示例#1
0
    def testBinData(self):
        sim = similarity.similarity([], 300)
        self.assertTrue(not sim.bins)
        sim = similarity.similarity(self.data, 300)
        sim.bin_data()
        sum = 0
        for bin in sim.bins:
            sum += len(bin)
        self.assertTrue(sum == len(sim.data))
        testbins = set()
        for bin in sim.bins:
            for b in bin:
                testbins.add(b)
        self.assertTrue(len(testbins) == len(sim.data))
        for i in range(len(sim.bins) - 1):
            self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i + 1]))

        data = []
        now = datetime.datetime.now()
        start = Coordinate(47, -122)
        end = Coordinate(47, -123)
        for i in range(10):
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        start = Coordinate(41, -74)
        end = Coordinate(42, -74)
        for i in range(10):
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        sim = similarity.similarity(data, 300)
        sim.bin_data()
        self.assertTrue(len(sim.bins) == 2)
示例#2
0
 def testDistance(self):
     start = Coordinate(-122.259447, 37.875174)
     end1 = Coordinate(-122.259279, 37.875479)
     end2 = Coordinate(-122.252287, 37.869569)
     now = datetime.datetime.now()
     t1 = Trip(None, None, None, None, now, now, start, end1)
     t2 = Trip(None, None, None, None, now, now, start, end2)
     sim = similarity.similarity(self.data, 300)
     self.assertTrue(sim.distance(start.lat, start.lon, end1.lat, end1.lon))
     self.assertTrue(
         not sim.distance(start.lat, start.lon, end2.lat, end2.lon))
    def testLocations(self):
        repy = rep.representatives(self.data, self.labels)
        repy.list_clusters()
        repy.get_reps()
        repy.locations()
        total = 0
        for bin in repy.bins:
            for i in range(len(bin)):
                b = bin[i]
                if b[0] == 'start':
                    a = repy.reps[b[1]].trip_start_location
                if b[0] == 'end':
                    a = repy.reps[b[1]].trip_end_location
                for j in range(i):
                    c = bin[j]
                    if c[0] == 'start':
                        d = repy.reps[c[1]].trip_start_location
                    if c[0] == 'end':
                        d = repy.reps[c[1]].trip_end_location
                    self.assertTrue(
                        repy.distance(a.lat, a.lon, d.lat, d.lon) < 300)
            total += len(bin)
        self.assertTrue(total == 2 * repy.num_clusters)
        for i in range(repy.num_clusters):
            self.assertTrue(
                sum(bin.count(('start', i)) for bin in repy.bins) == 1)
            self.assertTrue(
                sum(bin.count(('end', i)) for bin in repy.bins) == 1)
        self.assertTrue(len(repy.locs) == len(repy.bins))

        tripa = Trip(None, None, None, None, None, None, Coordinate(1, 2),
                     Coordinate(30, 40))
        tripb = Trip(None, None, None, None, None, None,
                     Coordinate(1.0000002, 2.0000002),
                     Coordinate(55.0000002, 85.0000002))
        tripc = Trip(None, None, None, None, None, None,
                     Coordinate(30.0000002, 40.0000002), Coordinate(55, 85))
        data = [tripa, tripb, tripc]
        labels = [0, 1, 2]
        repy = rep.representatives(data, labels)
        repy.list_clusters()
        repy.get_reps()
        repy.locations()
        self.assertTrue(repy.bins[0] == [('start', 0), ('start', 1)])
        self.assertTrue(repy.bins[1] == [('end', 0), ('start', 2)])
        self.assertTrue(repy.bins[2] == [('end', 1), ('end', 2)])
        self.assertTrue(round(repy.locs[0].lat, 7) == 1.0000001)
        self.assertTrue(round(repy.locs[0].lon, 7) == 2.0000001)
        self.assertTrue(round(repy.locs[1].lat, 7) == 30.0000001)
        self.assertTrue(round(repy.locs[1].lon, 7) == 40.0000001)
        self.assertTrue(round(repy.locs[2].lat, 7) == 55.0000001)
        self.assertTrue(round(repy.locs[2].lon, 7) == 85.0000001)
def read_data(uuid=None, size=None, old=True):
    db = edb.get_trip_db()
    if not old:
        logging.debug("not old")
        trips = esda.get_entries(esda.RAW_TRIP_KEY, uuid,
                                 time_query=None, geo_query=None)
        return trips

    if old:
        data = []
        trip_db = db
        if uuid:
            trips = trip_db.find({'user_id' : uuid, 'type' : 'move'})
        else:
            trips = trip_db.find({'type' : 'move'})
        for t in trips:
            try: 
                trip = Trip.trip_from_json(t)
            except:
                continue
            if not (trip.trip_start_location and trip.trip_end_location and trip.start_time):
                continue
            data.append(trip)
            if size:
                if len(data) == size:
                    break
        return data
示例#5
0
def read_data(uuid=None, size=None, old=True):
    db = edb.get_trip_db()
    if not old:
        logging.debug("not old")
        trips = esda.get_entries(esda.RAW_TRIP_KEY,
                                 uuid,
                                 time_query=None,
                                 geo_query=None)
        return trips

    if old:
        data = []
        trip_db = db
        if uuid:
            trips = trip_db.find({'user_id': uuid, 'type': 'move'})
        else:
            trips = trip_db.find({'type': 'move'})
        for t in trips:
            try:
                trip = Trip.trip_from_json(t)
            except:
                continue
            if not (trip.trip_start_location and trip.trip_end_location
                    and trip.start_time):
                continue
            data.append(trip)
            if size:
                if len(data) == size:
                    break
        return data
def read_data(uuid=None, size=None, old=True):
    data = []
    trip_db = edb.get_trip_db()
    if not old:
        trip_db = edb.get_trip_new_db()
        trips = trip_db.find({"user_id": uuid})
    else:
        if uuid:
            trips = trip_db.find({'user_id': uuid, 'type': 'move'})
        else:
            trips = trip_db.find({'type': 'move'})
        for t in trips:
            try:
                trip = Trip.trip_from_json(t)
            except:
                continue
            if not (trip.trip_start_location and trip.trip_end_location
                    and trip.start_time):
                continue
            data.append(trip)
            if size:
                if len(data) == size:
                    break
        return data
    return [ecwt.Trip(trip) for trip in trips]
 def get_reps(self):
     self.reps = []
     if not self.data:
         return
     for cluster in self.clusters:
         points = [[], [], [], []]
         for c in cluster:
             if self.is_old:
                 points[0].append(c.trip_start_location.lat)
                 points[1].append(c.trip_start_location.lon)
                 points[2].append(c.trip_end_location.lat)
                 points[3].append(c.trip_end_location.lon)
             else:
                 # We want (lat, lon) to be consistent with old above.
                 # But in the new, our data is in geojson so it is (lon, lat).
                 # Fix it by flipping the order of the indices
                 points[0].append(c.data.start_loc["coordinates"][1])
                 points[1].append(c.data.start_loc["coordinates"][0])
                 points[2].append(c.data.end_loc["coordinates"][1])
                 points[3].append(c.data.end_loc["coordinates"][0])
         centers = numpy.mean(points, axis=1)
         a = Trip(None, None, None, None, None, None,
                  Coordinate(centers[0], centers[1]),
                  Coordinate(centers[2], centers[3]))
         self.reps.append(a)
示例#8
0
    def testInit(self):
        try:
            sim = similarity.similarity([], 'b')
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        sim = similarity.similarity([], 100)
        self.assertTrue(len(sim.data) == 0)
        now = datetime.datetime.now()
        start = Coordinate(47, -122)
        end = Coordinate(47, -123)
        t1 = Trip(None, None, None, None, now, now, start, start)
        t2 = Trip(None, None, None, None, now, now, start, end)
        sim = similarity.similarity([t1, t2], 100)
        simmy = similarity.similarity([t2], 100)
        self.assertTrue(sim.data == simmy.data)
示例#9
0
 def testElbowDistance(self):
     start = Coordinate(47, -122)
     end = Coordinate(47, -123)
     now = datetime.datetime.now()
     t = Trip(None, None, None, None, now, now, start, end)
     data = [t] * 11
     bins = [[1, 2, 3, 4], [5, 6, 10], [7], [8], [9], [0]]
     sim = similarity.similarity(data, 300)
     sim.bins = bins
     sim.delete_bins()
     self.assertTrue(sim.num == 2)
    def testMatch(self):
        tripa = Trip(None, None, None, None, None, None, Coordinate(1, 2),
                     Coordinate(3, 4))
        tripb = Trip(None, None, None, None, None, None, Coordinate(3, 4),
                     Coordinate(1, 2))
        tripc = Trip(None, None, None, None, None, None, Coordinate(1, 2),
                     Coordinate(9, 10))

        data = [tripa, tripb, tripc]
        labels = [0, 1, 2]
        repy = rep.representatives(data, labels)
        repy.list_clusters()
        repy.get_reps()

        bin = [('start', 0), ('start', 2)]
        self.assertTrue(repy.match('end', 1, bin))
        bin = [('start', 0), ('end', 0)]
        self.assertTrue(not repy.match('end', 1, bin))
        bin = [('start', 0), ('start', 1)]
        self.assertTrue(not repy.match('end', 2, bin))
 def testReps(self):
     repy = rep.representatives(self.data, self.labels)
     repy.list_clusters()
     repy.get_reps()
     self.assertTrue(len(repy.reps) == len(repy.clusters))
     clusters = [0]
     tripa = Trip(None, None, None, None, None, None, Coordinate(1, 2),
                  Coordinate(3, 4))
     tripb = Trip(None, None, None, None, None, None, Coordinate(9, 10),
                  Coordinate(5, 8))
     tripc = Trip(None, None, None, None, None, None, Coordinate(5, 6),
                  Coordinate(4, 6))
     data = [tripa, tripb, tripc]
     labels = [0, 0, 0]
     repy = rep.representatives(data, labels)
     repy.list_clusters()
     repy.get_reps()
     self.assertTrue(repy.reps[0].trip_start_location.lat == 5)
     self.assertTrue(repy.reps[0].trip_start_location.lon == 6)
     self.assertTrue(repy.reps[0].trip_end_location.lat == 4)
     self.assertTrue(repy.reps[0].trip_end_location.lon == 6)
    def testCluster(self):
        feat = featurization.featurization(self.data)
        feat.cluster(min_clusters=2, max_clusters=10)
        self.assertTrue(len(feat.labels) == len(feat.points))
        self.assertTrue(feat.clusters == len(set(feat.labels)))
        a = feat.cluster(name='kmeans', min_clusters=5, max_clusters=20)
        self.assertTrue(len(feat.labels) == len(feat.points))
        self.assertTrue(feat.clusters == len(set(feat.labels)))
        b = feat.cluster(name='nonname', min_clusters=5, max_clusters=20)
        self.assertTrue(a == b) #defaults to kmeans with invalid clustering method
        feat.cluster(min_clusters=len(self.data)+1)
        c = feat.cluster(min_clusters = 0, max_clusters=20)
        d = feat.cluster(min_clusters = 2, max_clusters=20)
        self.assertTrue(c == d)
        try:
            feat.cluster(min_clusters = 10, max_clusters=2)
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        data = []
        start = Coordinate(47,-122)
        end = Coordinate(47,-123)
        for i in range(10):
            now = datetime.datetime.now()
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        start = Coordinate(41,-74)
        end = Coordinate(42, -74)
        for i in range(10):
            now = datetime.datetime.now()
            a = Trip(None, None, None, None, now, now, start, end)
            data.append(a)
        feat = featurization.featurization(data)
        feat.cluster()
        self.assertTrue(len(set(feat.labels)) == 2)
 def testCalculatePoints(self):
     feat = featurization.featurization([])
     self.assertTrue(not feat.data)
     feat = featurization.featurization(None)
     self.assertTrue(not feat.data)
     trip = Trip(None, None, None, None, None, None, None, None)
     data = [trip]
     try:
         feat = featurization.featurization(data)
     except AttributeError:
         self.assertTrue(True)
     except Exception:
         self.assertTrue(False)
     feat = featurization.featurization(self.data)
     self.assertTrue(len(feat.points) == len(feat.data))
     for p in feat.points:
         self.assertTrue(None not in p)
def read_data(uuid=None,size=None):
    data = []
    db = edb.get_trip_db()
    if uuid:
        trips = db.find({'user_id' : uuid, 'type' : 'move'})
    else:
        trips = db.find({'type' : 'move'})
    if trips.count() == 0: 
        return [] 
    for t in trips:
        try: 
            trip = Trip.trip_from_json(t)
        except:
            continue
        if not (trip.trip_start_location and trip.trip_end_location and trip.start_time):
            continue
        data.append(trip)
        if size:
            if len(data) == size:
                break
    if len(data) == 0: 
        return [] 
    return data
def read_data(uuid=None, size=None, old=True):
    data = []
    trip_db = edb.get_trip_db()
    if not old:
        trip_db = edb.get_trip_new_db()
        trips = trip_db.find({"user_id" : uuid})
    else:
        if uuid:
            trips = trip_db.find({'user_id' : uuid, 'type' : 'move'})
        else:
            trips = trip_db.find({'type' : 'move'})
        for t in trips:
            try: 
                trip = Trip.trip_from_json(t)
            except:
                continue
            if not (trip.trip_start_location and trip.trip_end_location and trip.start_time):
                continue
            data.append(trip)
            if size:
                if len(data) == size:
                    break
        return data
    return [ecwt.Trip(trip) for trip in trips]
示例#16
0
    def turn_into_trip(self, _id, user_id, trip_id, is_fake=False, itinerary=0):
        sections = [ ]
        our_json = self.get_json()
        mode_list = set()
        car_dist = 0
        if "plan" not in our_json:
            print("While querying alternatives from %s to %s" % (self.start_point, self.end_point))
            print("query URL is %s" % self.make_url())
            print("Response %s does not have a plan " % our_json)
            raise PathNotFoundException(our_json['debugOutput'])

        for leg in our_json["plan"]["itineraries"][itinerary]['legs']:
            coords = [ ]
            var = 'steps'
            if leg['mode'] == 'RAIL' or leg['mode'] == 'SUBWAY':
                var = 'intermediateStops'
                for step in leg[var]:
                    coords.append(Coordinate(step['lat'], step['lon'])) 

            start_time = otp_time_to_ours(leg["startTime"])
            end_time = otp_time_to_ours(leg["endTime"])
            distance = float(leg['distance'])
            start_loc = Coordinate(float(leg["from"]["lat"]), float(leg["from"]["lon"]))
            end_loc = Coordinate(float(leg["to"]["lat"]), float(leg["to"]["lon"]))
            coords.insert(0, start_loc)
            coords.append(end_loc)
            mode = leg["mode"]
            mode_list.add(mode)
            fake_id = random.random()
            points = [ ]
            for step in leg['steps']:
                c = Coordinate(step["lat"], step['lon'])
                #print c
                points.append(c)
            #print "len of points is %s" % len(points)
            section = Section(str(fake_id), user_id, trip_id, distance, "move", start_time, end_time, start_loc, end_loc, mode, mode, points)
            #section.points = coords
            sections.append(section)
            if mode == 'CAR':
                car_dist = distance
                car_start_coordinates = Coordinate(float(leg["from"]["lat"]), float(leg["from"]["lon"]))    
                car_end_coordinates = Coordinate(float(leg["to"]["lat"]), float(leg["to"]["lon"]))
        
        print "len(sections) = %s" % len(sections)
        final_start_loc = Coordinate(float(our_json["plan"]["from"]["lat"]), float(our_json["plan"]["from"]["lon"]))         
        final_end_loc = Coordinate(float(our_json["plan"]["to"]["lat"]), float(our_json["plan"]["to"]["lon"]))
        final_start_time = otp_time_to_ours(our_json['plan']['itineraries'][0]["startTime"])
        final_end_time = otp_time_to_ours(our_json['plan']['itineraries'][0]["endTime"])
        cost = 0
        if "RAIL" in mode_list or "SUBWAY" in mode_list:
            try:
                cost = float(our_json['plan']['itineraries'][0]['fare']['fare']['regular']['cents']) / 100.0   #gives fare in cents 
            except:
                cost = 0
        elif "CAR" in mode_list:
            # TODO calculate car cost
            cost = 0
        mode_list = list(mode_list)
        if is_fake:
            return Trip(_id, user_id, trip_id, sections, final_start_time, final_end_time, final_start_loc, final_end_loc)
        return Alternative_Trip(_id, user_id, trip_id, sections, final_start_time, final_end_time, final_start_loc, final_end_loc, 0, cost, mode_list)