"TRIP_ID", "START_POINT", "GRID_POLYLINE",
                                 "TRUNC_GRID_POLYLINE"
                             ],
                             converters={
                                 "START_POINT": lambda x: eval(x),
                                 "GRID_POLYLINE": lambda x: eval(x),
                                 "TRUNC_GRID_POLYLINE": lambda x: eval(x)
                             })

    # Loop through every trip to make a prediction
    for nr, partial_trip in validation.iterrows():
        # Select the last segment of the partial trip
        end = partial_trip.TRUNC_GRID_POLYLINE[-k:]

        # Create a DestinationGrid object
        posterior = DestinationGrid(N, M)

        # Match this segment with trips in the training set
        for i, chunk in enumerate(train):
            # Only look at trips that start from the same position
            trips = chunk[chunk.START_CELL == partial_trip.GRID_POLYLINE[0]]

            for idx, row in trips.iterrows():
                if match(end, row.GRID_POLYLINE, k):
                    destination = row.GRID_POLYLINE[-1]
                    posterior._table[destination] += 1

        posterior.normalizeProbs()

        # Export the probabilities
        dests = [id_to_nr(dest) for dest in posterior]
Exemplo n.º 2
0
  for simulation in xrange(S):
    # Initialize the walker
    wlk = Walker(lattice = ltc, start = start_simulation, dest_threshold = 1)
    
    # Simulate a random walk and record the destination
    destinations.append(wlk.simulateWalker())

  # Use pandas facilities
  table = pd.DataFrame({"DEST": pd.Series(destinations)})
  table["COUNT"] = 1
  
  prob_table = table.groupby(["DEST"], as_index = False).aggregate({"COUNT": "sum"})
  prob_table.COUNT = prob_table.COUNT / np.sum(prob_table.COUNT)
  
  # Store the probabilities in a DestinationGrid object
  posterior = DestinationGrid(N, M)
  
  for idx, row in prob_table.iterrows():
    posterior.setProb(row.DEST, row.COUNT)
    
    
  # Plot the new distribution
  plt.subplot(1,2,1)
  plt.imshow(np.log(posterior.as_array() + trip_to_array(partial_trip.GRID_POLYLINE, N, M)), interpolation = "nearest")
  plt.title("Complete trip superimposed")
  
  plt.subplot(1,2,2)
  plt.imshow(np.log(posterior.as_array() + trip_to_array(partial_trip.TRUNC_GRID_POLYLINE, N, M)), interpolation = "nearest")
  plt.title("Partial trip superimposed")

  
Exemplo n.º 3
0
        # Initialize the walker
        wlk = Walker(lattice=ltc, start=start_simulation, dest_threshold=1)

        # Simulate a random walk and record the destination
        destinations.append(wlk.simulateWalker())

    # Use pandas facilities
    table = pd.DataFrame({"DEST": pd.Series(destinations)})
    table["COUNT"] = 1

    prob_table = table.groupby(["DEST"],
                               as_index=False).aggregate({"COUNT": "sum"})
    prob_table.COUNT = prob_table.COUNT / np.sum(prob_table.COUNT)

    # Store the probabilities in a DestinationGrid object
    posterior = DestinationGrid(N, M)

    for idx, row in prob_table.iterrows():
        posterior.setProb(row.DEST, row.COUNT)

    # Plot the new distribution
    plt.subplot(1, 2, 1)
    plt.imshow(np.log(posterior.as_array() +
                      trip_to_array(partial_trip.GRID_POLYLINE, N, M)),
               interpolation="nearest")
    plt.title("Complete trip superimposed")

    plt.subplot(1, 2, 2)
    plt.imshow(np.log(posterior.as_array() +
                      trip_to_array(partial_trip.TRUNC_GRID_POLYLINE, N, M)),
               interpolation="nearest")
 
 # Import the validation data
 validation = pd.read_csv(filepath_or_buffer = filepath_val,
                          sep = ",",
                          usecols = ["TRIP_ID", "START_POINT", "GRID_POLYLINE", "TRUNC_GRID_POLYLINE"],
                          converters = {"START_POINT": lambda x: eval(x),
                                        "GRID_POLYLINE": lambda x: eval(x),
                                        "TRUNC_GRID_POLYLINE": lambda x: eval(x)})
                                        
 # Loop through every trip to make a prediction
 for nr, partial_trip in validation.iterrows():                                                                
   # Select the last segment of the partial trip
   end = partial_trip.TRUNC_GRID_POLYLINE[-k:]
   
   # Create a DestinationGrid object
   posterior = DestinationGrid(N, M)
 
   # Match this segment with trips in the training set
   for i, chunk in enumerate(train):
     # Only look at trips that start from the same position
     trips = chunk[chunk.START_CELL == partial_trip.GRID_POLYLINE[0]]
     
     for idx, row in trips.iterrows():
       if match(end, row.GRID_POLYLINE, k):
         destination = row.GRID_POLYLINE[-1]
         posterior._table[destination] += 1
         
   posterior.normalizeProbs()   
   
   # Export the probabilities
   dests = [id_to_nr(dest) for dest in posterior]
        {  #"TIMESTAMP" : lambda x: datetime.datetime.fromtimestamp(x),
            "POLYLINE": lambda x: json.loads(x),
            "START_POINT": lambda x: eval(x),
            "GRID_POLYLINE": lambda x: eval(x),
            "TRUNC_POLYLINE": lambda x: eval(x),
            "TRUNC_GRID_POLYLINE": lambda x: eval(x)
        })

    # Select a partial trip
    partial_trip = test.loc[0]

    # Select the last segment of the partial trip
    end = partial_trip.TRUNC_GRID_POLYLINE[-k:]

    # Create a DestinationGrid object
    grid = DestinationGrid(N, M)

    # Match this segment with trips in the training set
    for i, chunk in enumerate(trips):
        for idx, row in chunk.iterrows():
            if match(end, row.GRID_POLYLINE, k):
                dest = row.GRID_POLYLINE[-1]
                grid._table[dest] += 1

        print "Processed chunk %d" % i

    grid.normalizeProbs()

    # Plot the new distribution
    plt.subplot(1, 2, 1)
    plt.imshow(np.log(grid.as_array() +
Exemplo n.º 6
0
    trips["MATCH"] = False

    # Match this segment with trips in the training set
    for idx, row in trips.iterrows():
        if match(end, row.GRID_POLYLINE, k):
            trips.set_value(idx, 'MATCH', True)

    # We are only really interested in the matched trips now
    trips = trips[trips.MATCH]

    # Compute the destination distribution
    trips_agg = trips.groupby(["DEST_CELL"],
                              as_index=False).aggregate({"MATCH": "sum"})
    trips_agg.MATCH = trips_agg.MATCH / np.sum(trips_agg.MATCH)

    # Store it in a DestinationGrid object
    grid = DestinationGrid(N, M)
    grid.setProbs(trips_agg.DEST_CELL.values, trips_agg.MATCH.values)

    # Plot the new distribution
    plt.subplot(1, 2, 1)
    plt.imshow(np.log(grid.as_array() +
                      trip_to_array(partial_trip.GRID_POLYLINE, N, M)),
               interpolation="nearest")
    plt.title("Complete trip superimposed")

    plt.subplot(1, 2, 2)
    plt.imshow(np.log(grid.as_array() +
                      trip_to_array(partial_trip.TRUNC_GRID_POLYLINE, N, M)),
               interpolation="nearest")
    plt.title("Partial trip superimposed")
                    nrows = 10,
                    converters = {#"TIMESTAMP" : lambda x: datetime.datetime.fromtimestamp(x),
                                  "POLYLINE": lambda x: json.loads(x),
                                  "START_POINT": lambda x: eval(x),
                                  "GRID_POLYLINE": lambda x: eval(x),
                                  "TRUNC_POLYLINE": lambda x: eval(x),
                                  "TRUNC_GRID_POLYLINE": lambda x: eval(x)})
                                                                     
 # Select a partial trip                              
 partial_trip = test.loc[0]
 
 # Select the last segment of the partial trip
 end = partial_trip.TRUNC_GRID_POLYLINE[-k:]
 
 # Create a DestinationGrid object
 grid = DestinationGrid(N, M)
 
 # Match this segment with trips in the training set
 for i, chunk in enumerate(trips):
   for idx, row in chunk.iterrows():
     if match(end, row.GRID_POLYLINE, k):
       dest = row.GRID_POLYLINE[-1]
       grid._table[dest] += 1
   
   print "Processed chunk %d" % i
   
 grid.normalizeProbs()
 
 # Plot the new distribution
 plt.subplot(1,2,1)
 plt.imshow(np.log(grid.as_array() + trip_to_array(partial_trip.GRID_POLYLINE, N, M)), interpolation = "nearest")
Exemplo n.º 8
0
 
 # Select the last segment of the partial trip
 end = partial_trip.TRUNC_GRID_POLYLINE[-k:]
 
 # Create a dummy flag to indicate if a trip matches or not
 trips["MATCH"] = False
 
 # Match this segment with trips in the training set
 for idx, row in trips.iterrows():
   if match(end, row.GRID_POLYLINE, k):
     trips.set_value(idx, 'MATCH', True)
   
 # We are only really interested in the matched trips now
 trips = trips[trips.MATCH]
 
 # Compute the destination distribution
 trips_agg = trips.groupby(["DEST_CELL"], as_index = False).aggregate({"MATCH": "sum"})
 trips_agg.MATCH = trips_agg.MATCH / np.sum(trips_agg.MATCH)
 
 # Store it in a DestinationGrid object
 grid = DestinationGrid(N, M)
 grid.setProbs(trips_agg.DEST_CELL.values, trips_agg.MATCH.values)
 
 # Plot the new distribution
 plt.subplot(1,2,1)
 plt.imshow(np.log(grid.as_array() + trip_to_array(partial_trip.GRID_POLYLINE, N, M)), interpolation = "nearest")
 plt.title("Complete trip superimposed")
 
 plt.subplot(1,2,2)
 plt.imshow(np.log(grid.as_array() + trip_to_array(partial_trip.TRUNC_GRID_POLYLINE, N, M)), interpolation = "nearest")
 plt.title("Partial trip superimposed")