Exemplo n.º 1
0
 def get_reps(self):
     self.reps = []
     if not self.data:
         return
     for cluster in self.clusters:
         points = [[], [], [], []]
         for c in cluster:
             if self.is_old:
                 points[0].append(c.trip_start_location.lat)
                 points[1].append(c.trip_start_location.lon)
                 points[2].append(c.trip_end_location.lat)
                 points[3].append(c.trip_end_location.lon)
             else:
                 # We want (lat, lon) to be consistent with old above.
                 # But in the new, our data is in geojson so it is (lon, lat).
                 # Fix it by flipping the order of the indices
                 points[0].append(c.data.start_loc["coordinates"][1])
                 points[1].append(c.data.start_loc["coordinates"][0])
                 points[2].append(c.data.end_loc["coordinates"][1])
                 points[3].append(c.data.end_loc["coordinates"][0])
         centers = numpy.mean(points, axis=1)
         a = Trip(None, None, None, None, None, None,
                  Coordinate(centers[0], centers[1]),
                  Coordinate(centers[2], centers[3]))
         self.reps.append(a)
Exemplo n.º 2
0
    def testBinData(self):
        sim = similarity.similarity([], 300)
        self.assertTrue(not sim.bins)
        sim = similarity.similarity(self.data, 300)
        sim.bin_data()
        sum = 0
        for bin in sim.bins:
            sum += len(bin)
        self.assertTrue(sum == len(sim.data))
        testbins = set()
        for bin in sim.bins:
            for b in bin:
                testbins.add(b)
        self.assertTrue(len(testbins) == len(sim.data))
        for i in range(len(sim.bins) - 1):
            self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i + 1]))

        data = []
        now = datetime.datetime.now()
        start = Coordinate(47, -122)
        end = Coordinate(47, -123)
        for i in range(10):
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        start = Coordinate(41, -74)
        end = Coordinate(42, -74)
        for i in range(10):
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        sim = similarity.similarity(data, 300)
        sim.bin_data()
        self.assertTrue(len(sim.bins) == 2)
Exemplo n.º 3
0
 def testElbowDistance(self):
     start = Coordinate(47, -122)
     end = Coordinate(47, -123)
     now = datetime.datetime.now()
     t = Trip(None, None, None, None, now, now, start, end)
     data = [t] * 11
     bins = [[1, 2, 3, 4], [5, 6, 10], [7], [8], [9], [0]]
     sim = similarity.similarity(data, 300)
     sim.bins = bins
     sim.delete_bins()
     self.assertTrue(sim.num == 2)
Exemplo n.º 4
0
 def testDistance(self):
     start = Coordinate(-122.259447, 37.875174)
     end1 = Coordinate(-122.259279, 37.875479)
     end2 = Coordinate(-122.252287, 37.869569)
     now = datetime.datetime.now()
     t1 = Trip(None, None, None, None, now, now, start, end1)
     t2 = Trip(None, None, None, None, now, now, start, end2)
     sim = similarity.similarity(self.data, 300)
     self.assertTrue(sim.distance(start.lat, start.lon, end1.lat, end1.lon))
     self.assertTrue(
         not sim.distance(start.lat, start.lon, end2.lat, end2.lon))
Exemplo n.º 5
0
    def testInit(self):
        try:
            sim = similarity.similarity([], 'b')
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        sim = similarity.similarity([], 100)
        self.assertTrue(len(sim.data) == 0)
        now = datetime.datetime.now()
        start = Coordinate(47, -122)
        end = Coordinate(47, -123)
        t1 = Trip(None, None, None, None, now, now, start, start)
        t2 = Trip(None, None, None, None, now, now, start, end)
        sim = similarity.similarity([t1, t2], 100)
        simmy = similarity.similarity([t2], 100)
        self.assertTrue(sim.data == simmy.data)
Exemplo n.º 6
0
 def geocode(cls, address):
     try:
         jsn = cls.get_json_geo(address)
         lat = float(jsn[0]["lat"])
         lon = float(jsn[0]["lon"])
         return Coordinate(lat, lon)
     except Exception as e:
         print(e)
         print("defaulting")
         #TODO: Right now there is no default gecoder. Discuss if we should create a google account for this.
         return _do_google_geo(address) # If we fail ask the gods
Exemplo n.º 7
0
    def testCluster(self):
        feat = featurization.featurization(self.data)
        feat.cluster(min_clusters=2, max_clusters=10)
        self.assertTrue(len(feat.labels) == len(feat.points))
        self.assertTrue(feat.clusters == len(set(feat.labels)))
        a = feat.cluster(name='kmeans', min_clusters=5, max_clusters=20)
        self.assertTrue(len(feat.labels) == len(feat.points))
        self.assertTrue(feat.clusters == len(set(feat.labels)))
        b = feat.cluster(name='nonname', min_clusters=5, max_clusters=20)
        self.assertTrue(a == b) #defaults to kmeans with invalid clustering method
        feat.cluster(min_clusters=len(self.data)+1)
        c = feat.cluster(min_clusters = 0, max_clusters=20)
        d = feat.cluster(min_clusters = 2, max_clusters=20)
        self.assertTrue(c == d)
        try:
            feat.cluster(min_clusters = 10, max_clusters=2)
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        data = []
        start = Coordinate(47,-122)
        end = Coordinate(47,-123)
        for i in range(10):
            now = datetime.datetime.now()
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        start = Coordinate(41,-74)
        end = Coordinate(42, -74)
        for i in range(10):
            now = datetime.datetime.now()
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        feat = featurization.featurization(data)
        feat.cluster()
        self.assertTrue(len(set(feat.labels)) == 2)
Exemplo n.º 8
0
def get_one_random_point_in_radius(address, radius):
    # From https://gis.stackexchange.com/questions/25877/how-to-generate-random-locations-nearby-my-location
    crd = geocode_address(address)
    radius_in_degrees = kilometers_to_degrees(radius)
    x_0 = crd.get_lon()
    y_0 = crd.get_lat()
    u = random.random()
    v = random.random()
    w = radius_in_degrees * math.sqrt(u)
    t = 2 * math.pi * v
    x = w * math.cos(t)
    y = w * math.sin(t)
    x = float(x) / float(math.cos(y_0))  # To account for Earth curvature stuff
    to_return = Coordinate(y + y_0, x + x_0)
    return to_return
    def testLocations(self):
        repy = rep.representatives(self.data, self.labels)
        repy.list_clusters()
        repy.get_reps()
        repy.locations()
        total = 0
        for bin in repy.bins:
            for i in range(len(bin)):
                b = bin[i]
                if b[0] == 'start':
                    a = repy.reps[b[1]].trip_start_location
                if b[0] == 'end':
                    a = repy.reps[b[1]].trip_end_location
                for j in range(i):
                    c = bin[j]
                    if c[0] == 'start':
                        d = repy.reps[c[1]].trip_start_location
                    if c[0] == 'end':
                        d = repy.reps[c[1]].trip_end_location
                    self.assertTrue(
                        repy.distance(a.lat, a.lon, d.lat, d.lon) < 300)
            total += len(bin)
        self.assertTrue(total == 2 * repy.num_clusters)
        for i in range(repy.num_clusters):
            self.assertTrue(
                sum(bin.count(('start', i)) for bin in repy.bins) == 1)
            self.assertTrue(
                sum(bin.count(('end', i)) for bin in repy.bins) == 1)
        self.assertTrue(len(repy.locs) == len(repy.bins))

        tripa = Trip(None, None, None, None, None, None, Coordinate(1, 2),
                     Coordinate(30, 40))
        tripb = Trip(None, None, None, None, None, None,
                     Coordinate(1.0000002, 2.0000002),
                     Coordinate(55.0000002, 85.0000002))
        tripc = Trip(None, None, None, None, None, None,
                     Coordinate(30.0000002, 40.0000002), Coordinate(55, 85))
        data = [tripa, tripb, tripc]
        labels = [0, 1, 2]
        repy = rep.representatives(data, labels)
        repy.list_clusters()
        repy.get_reps()
        repy.locations()
        self.assertTrue(repy.bins[0] == [('start', 0), ('start', 1)])
        self.assertTrue(repy.bins[1] == [('end', 0), ('start', 2)])
        self.assertTrue(repy.bins[2] == [('end', 1), ('end', 2)])
        self.assertTrue(round(repy.locs[0].lat, 7) == 1.0000001)
        self.assertTrue(round(repy.locs[0].lon, 7) == 2.0000001)
        self.assertTrue(round(repy.locs[1].lat, 7) == 30.0000001)
        self.assertTrue(round(repy.locs[1].lon, 7) == 40.0000001)
        self.assertTrue(round(repy.locs[2].lat, 7) == 55.0000001)
        self.assertTrue(round(repy.locs[2].lon, 7) == 85.0000001)
Exemplo n.º 10
0
    def locations(self):
        self.bins = []
        self.locs = []
        if not self.data:
            self.num_locations = 0
            return
        for a in range(self.num_clusters):
            added_start = False
            added_end = False
            for bin in self.bins:
                if self.match('start', a, bin) and not added_start:
                    bin.append(('start', a))
                    added_start = True
                if self.match('end', a, bin) and not added_end:
                    bin.append(('end', a))
                    added_end = True
            if not added_start:
                newbin = [('start', a)]
                if self.match('end', a, newbin) and not added_end:
                    newbin.append(('end', a))
                    added_end = True
                self.bins.append(newbin)
            if not added_end:
                self.bins.append([('end', a)])

        self.num_locations = len(self.bins)

        self.locs = []
        for bin in self.bins:
            locs = []
            for b in bin:
                if b[0] == 'start':
                    point = self.reps[b[1]].trip_start_location
                if b[0] == 'end':
                    point = self.reps[b[1]].trip_end_location
                locs.append([point.lat, point.lon])
            locs = numpy.mean(locs, axis=0)
            coord = Coordinate(locs[0], locs[1])
            self.locs.append(coord)
Exemplo n.º 11
0
    def testMatch(self):
        tripa = Trip(None, None, None, None, None, None, Coordinate(1, 2),
                     Coordinate(3, 4))
        tripb = Trip(None, None, None, None, None, None, Coordinate(3, 4),
                     Coordinate(1, 2))
        tripc = Trip(None, None, None, None, None, None, Coordinate(1, 2),
                     Coordinate(9, 10))

        data = [tripa, tripb, tripc]
        labels = [0, 1, 2]
        repy = rep.representatives(data, labels)
        repy.list_clusters()
        repy.get_reps()

        bin = [('start', 0), ('start', 2)]
        self.assertTrue(repy.match('end', 1, bin))
        bin = [('start', 0), ('end', 0)]
        self.assertTrue(not repy.match('end', 1, bin))
        bin = [('start', 0), ('start', 1)]
        self.assertTrue(not repy.match('end', 2, bin))
Exemplo n.º 12
0
 def testReps(self):
     repy = rep.representatives(self.data, self.labels)
     repy.list_clusters()
     repy.get_reps()
     self.assertTrue(len(repy.reps) == len(repy.clusters))
     clusters = [0]
     tripa = Trip(None, None, None, None, None, None, Coordinate(1, 2),
                  Coordinate(3, 4))
     tripb = Trip(None, None, None, None, None, None, Coordinate(9, 10),
                  Coordinate(5, 8))
     tripc = Trip(None, None, None, None, None, None, Coordinate(5, 6),
                  Coordinate(4, 6))
     data = [tripa, tripb, tripc]
     labels = [0, 0, 0]
     repy = rep.representatives(data, labels)
     repy.list_clusters()
     repy.get_reps()
     self.assertTrue(repy.reps[0].trip_start_location.lat == 5)
     self.assertTrue(repy.reps[0].trip_start_location.lon == 6)
     self.assertTrue(repy.reps[0].trip_end_location.lat == 4)
     self.assertTrue(repy.reps[0].trip_end_location.lon == 6)
Exemplo n.º 13
0
def _do_google_geo(address):
    geo = pyGeo(GOOGLE_MAPS_KEY)
    results = geo.geocode(address)
    return Coordinate(results[0].coordinates[0], results[0].coordinates[1])
Exemplo n.º 14
0
    def turn_into_trip(self, _id, user_id, trip_id, is_fake=False, itinerary=0):
        sections = [ ]
        our_json = self.get_json()
        mode_list = set()
        car_dist = 0
        if "plan" not in our_json:
            print("While querying alternatives from %s to %s" % (self.start_point, self.end_point))
            print("query URL is %s" % self.make_url())
            print("Response %s does not have a plan " % our_json)
            raise PathNotFoundException(our_json['debugOutput'])

        for leg in our_json["plan"]["itineraries"][itinerary]['legs']:
            coords = [ ]
            var = 'steps'
            if leg['mode'] == 'RAIL' or leg['mode'] == 'SUBWAY':
                var = 'intermediateStops'
                for step in leg[var]:
                    coords.append(Coordinate(step['lat'], step['lon'])) 

            start_time = otp_time_to_ours(leg["startTime"])
            end_time = otp_time_to_ours(leg["endTime"])
            distance = float(leg['distance'])
            start_loc = Coordinate(float(leg["from"]["lat"]), float(leg["from"]["lon"]))
            end_loc = Coordinate(float(leg["to"]["lat"]), float(leg["to"]["lon"]))
            coords.insert(0, start_loc)
            coords.append(end_loc)
            mode = leg["mode"]
            mode_list.add(mode)
            fake_id = random.random()
            points = [ ]
            for step in leg['steps']:
                c = Coordinate(step["lat"], step['lon'])
                #print c
                points.append(c)
            #print "len of points is %s" % len(points)
            section = Section(str(fake_id), user_id, trip_id, distance, "move", start_time, end_time, start_loc, end_loc, mode, mode, points)
            #section.points = coords
            sections.append(section)
            if mode == 'CAR':
                car_dist = distance
                car_start_coordinates = Coordinate(float(leg["from"]["lat"]), float(leg["from"]["lon"]))    
                car_end_coordinates = Coordinate(float(leg["to"]["lat"]), float(leg["to"]["lon"]))
        
        print "len(sections) = %s" % len(sections)
        final_start_loc = Coordinate(float(our_json["plan"]["from"]["lat"]), float(our_json["plan"]["from"]["lon"]))         
        final_end_loc = Coordinate(float(our_json["plan"]["to"]["lat"]), float(our_json["plan"]["to"]["lon"]))
        final_start_time = otp_time_to_ours(our_json['plan']['itineraries'][0]["startTime"])
        final_end_time = otp_time_to_ours(our_json['plan']['itineraries'][0]["endTime"])
        cost = 0
        if "RAIL" in mode_list or "SUBWAY" in mode_list:
            try:
                cost = float(our_json['plan']['itineraries'][0]['fare']['fare']['regular']['cents']) / 100.0   #gives fare in cents 
            except:
                cost = 0
        elif "CAR" in mode_list:
            # TODO calculate car cost
            cost = 0
        mode_list = list(mode_list)
        if is_fake:
            return Trip(_id, user_id, trip_id, sections, final_start_time, final_end_time, final_start_loc, final_end_loc)
        return Alternative_Trip(_id, user_id, trip_id, sections, final_start_time, final_end_time, final_start_loc, final_end_loc, 0, cost, mode_list)