Пример #1
0
    def testBinData(self):
        sim = similarity.similarity([], 300)
        self.assertTrue(not sim.bins)
        sim = similarity.similarity(self.data, 300)
        sim.bin_data()
        sum = 0
        for bin in sim.bins:
            sum += len(bin)
        self.assertTrue(sum == len(sim.data))
        testbins = set()
        for bin in sim.bins:
            for b in bin:
                testbins.add(b)
        self.assertTrue(len(testbins) == len(sim.data))
        for i in range(len(sim.bins) - 1):
            self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i + 1]))

        data = []
        now = time.time()
        start = [-122, 47]
        end = [-123, 47]
        for i in range(10):
            data.append(etatc._createTripEntry(self, now, now, start, end))
        start = [-74, 41]
        end = [-74, 42]
        for i in range(10):
            data.append(etatc._createTripEntry(self, now, now, start, end))
        sim = similarity.similarity(data, 300)
        sim.bin_data()
        self.assertTrue(len(sim.bins) == 2)
Пример #2
0
    def testBinData(self):
        sim = similarity.similarity([], 300)
        self.assertTrue(not sim.bins)
        sim = similarity.similarity(self.data, 300)
        sim.bin_data()
        sum = 0
        for bin in sim.bins:
            sum += len(bin)
        self.assertTrue(sum == len(sim.data))
        testbins = set()
        for bin in sim.bins:
            for b in bin:
                testbins.add(b)
        self.assertTrue(len(testbins) == len(sim.data))
        for i in range(len(sim.bins)-1):
            self.assertTrue(len(sim.bins[i]) >= len(sim.bins[i+1]))

        data = []
        now = time.time()
        start = [-122, 47]
        end = [-123, 47]
        for i in range(10):
            data.append(etatc._createTripEntry(self, now, now, start, end))
        start = [-74, 41]
        end = [-74, 42]
        for i in range(10):
            data.append(etatc._createTripEntry(self, now, now, start, end))
        sim = similarity.similarity(data, 300)
        sim.bin_data()
        self.assertTrue(len(sim.bins) == 2)
Пример #3
0
 def testDistance(self):
     start = [-122.259447, 37.875174]
     end1 = [-122.259279, 37.875479]
     end2 = [-122.252287, 37.869569]
     now = time.time()
     t1 = etatc._createTripEntry(self, now, now, start, end1)
     t2 = etatc._createTripEntry(self, now, now, start, end2)
     sim = similarity.similarity(self.data, 300)
     self.assertTrue(sim.distance(start[1], start[0], end1[1], end1[0]))
     self.assertTrue(not sim.distance(start[1], start[0], end2[1], end2[0]))
Пример #4
0
 def testDistance(self):
     start = [-122.259447, 37.875174]
     end1 = [-122.259279, 37.875479]
     end2 = [-122.252287, 37.869569]
     now = time.time()
     t1 = etatc._createTripEntry(self, now, now, start, end1)
     t2 = etatc._createTripEntry(self, now, now, start, end2)
     sim = similarity.similarity(self.data, 300)
     self.assertTrue(sim.distance(start[1], start[0], end1[1], end1[0]))
     self.assertTrue(not sim.distance(start[1], start[0], end2[1], end2[0]))
Пример #5
0
    def testLocations(self):
        repy = rep.representatives(self.data, self.labels)
        repy.list_clusters()
        repy.get_reps()
        repy.locations()
        total = 0
        for bin in repy.bins:
            for i in range(len(bin)):
                b = bin[i]
                if b[0] == 'start':
                    a = repy.reps[b[1]].data.start_loc
                if b[0] == 'end':
                    a = repy.reps[b[1]].data.end_loc
                for j in range(i):
                    c = bin[j]
                    if c[0] == 'start':
                        d = repy.reps[c[1]].data.start_loc
                    if c[0] == 'end':
                        d = repy.reps[c[1]].data.end_loc
                    self.assertTrue(
                        repy.distance(a.coordinates[1], a.coordinates[0], d.
                                      coordinates[1], d.coordinates[0]) < 300)
            total += len(bin)
        self.assertTrue(total == 2 * repy.num_clusters)
        for i in range(repy.num_clusters):
            self.assertTrue(
                sum(bin.count(('start', i)) for bin in repy.bins) == 1)
            self.assertTrue(
                sum(bin.count(('end', i)) for bin in repy.bins) == 1)
        self.assertTrue(len(repy.locs) == len(repy.bins))

        now = time.time()
        tripa = etatc._createTripEntry(self, now, now, [1, 2], [30, 40])
        tripb = etatc._createTripEntry(self, now, now, [1.0000002, 2.0000002],
                                       [55.0000002, 85.0000002])
        tripc = etatc._createTripEntry(self, now, now,
                                       [30.0000002, 40.0000002], [55, 85])
        data = [tripa, tripb, tripc]
        labels = [0, 1, 2]
        repy = rep.representatives(data, labels)
        repy.list_clusters()
        repy.get_reps()
        repy.locations()
        self.assertEqual(repy.bins[0], [('start', 0), ('start', 1)])
        self.assertEqual(repy.bins[1], [('end', 0), ('start', 2)])
        self.assertEqual(repy.bins[2], [('end', 1), ('end', 2)])
        self.assertAlmostEqual(repy.locs[0][0], 1.0000001, places=7)
        self.assertAlmostEqual(repy.locs[0][1], 2.0000001, places=7)
        self.assertAlmostEqual(repy.locs[1][0], 30.0000001, places=7)
        self.assertAlmostEqual(repy.locs[1][1], 40.0000001, places=7)
        self.assertAlmostEqual(repy.locs[2][0], 55.0000001, places=7)
        self.assertAlmostEqual(repy.locs[2][1], 85.0000001, places=7)
    def testLocations(self):
        repy = rep.representatives(self.data, self.labels)
        repy.list_clusters()
        repy.get_reps()
        repy.locations()
        total = 0
        for bin in repy.bins:
            for i in range(len(bin)):
                b = bin[i]
                if b[0] == 'start':
                    a = repy.reps[b[1]].data.start_loc
                if b[0] == 'end':
                    a = repy.reps[b[1]].data.end_loc
                for j in range(i):
                    c = bin[j]
                    if c[0] == 'start':
                        d = repy.reps[c[1]].data.start_loc
                    if c[0] == 'end':
                        d = repy.reps[c[1]].data.end_loc
                    self.assertTrue(repy.distance(a.coordinates[1], a.coordinates[0],
                                                  d.coordinates[1], d.coordinates[0]) < 300)
            total += len(bin)
        self.assertTrue(total == 2 * repy.num_clusters)
        for i in range(repy.num_clusters):
            self.assertTrue(sum(bin.count(('start',i)) for bin in repy.bins) == 1)
            self.assertTrue(sum(bin.count(('end',i)) for bin in repy.bins) == 1)
        self.assertTrue(len(repy.locs) == len(repy.bins))

        now = time.time()
        tripa = etatc._createTripEntry(self, now, now, [1,2], [30,40])
        tripb = etatc._createTripEntry(self, now, now, [1.0000002,2.0000002], [55.0000002,85.0000002])
        tripc = etatc._createTripEntry(self, now, now, [30.0000002,40.0000002], [55,85])
        data = [tripa, tripb, tripc]
        labels = [0,1,2]
        repy = rep.representatives(data, labels)
        repy.list_clusters()
        repy.get_reps()
        repy.locations()
        self.assertEqual(repy.bins[0], [('start', 0), ('start', 1)])
        self.assertEqual(repy.bins[1], [('end', 0), ('start', 2)])
        self.assertEqual(repy.bins[2], [('end', 1), ('end', 2)])
        self.assertAlmostEqual(repy.locs[0][0], 1.0000001, places=7)
        self.assertAlmostEqual(repy.locs[0][1], 2.0000001, places=7)
        self.assertAlmostEqual(repy.locs[1][0], 30.0000001, places=7)
        self.assertAlmostEqual(repy.locs[1][1], 40.0000001, places=7)
        self.assertAlmostEqual(repy.locs[2][0], 55.0000001, places=7)
        self.assertAlmostEqual(repy.locs[2][1], 85.0000001, places=7)
    def testMatch(self):
        now = time.time()
        tripa = etatc._createTripEntry(self, now, now, [1,2], [3,4])
        tripb = etatc._createTripEntry(self, now, now, [3,4], [1,2])
        tripc = etatc._createTripEntry(self, now, now, [1,2], [9,10])

        data = [tripa, tripb, tripc]
        labels = [0,1,2]
        repy = rep.representatives(data, labels)
        repy.list_clusters()
        repy.get_reps()

        bin = [('start', 0), ('start', 2)]
        self.assertTrue(repy.match('end', 1, bin))
        bin = [('start',0), ('end', 0)]
        self.assertTrue(not repy.match('end', 1, bin))
        bin = [('start', 0), ('start', 1)]
        self.assertTrue(not repy.match('end', 2, bin))
Пример #8
0
    def testMatch(self):
        now = time.time()
        tripa = etatc._createTripEntry(self, now, now, [1, 2], [3, 4])
        tripb = etatc._createTripEntry(self, now, now, [3, 4], [1, 2])
        tripc = etatc._createTripEntry(self, now, now, [1, 2], [9, 10])

        data = [tripa, tripb, tripc]
        labels = [0, 1, 2]
        repy = rep.representatives(data, labels)
        repy.list_clusters()
        repy.get_reps()

        bin = [('start', 0), ('start', 2)]
        self.assertTrue(repy.match('end', 1, bin))
        bin = [('start', 0), ('end', 0)]
        self.assertTrue(not repy.match('end', 1, bin))
        bin = [('start', 0), ('start', 1)]
        self.assertTrue(not repy.match('end', 2, bin))
Пример #9
0
 def testElbowDistance(self):
     start = [-122, 47]
     end = [-123, 47]
     now = time.time()
     t = etatc._createTripEntry(self, now, now, start, end)
     data = [t] * 11
     bins = [[1, 2, 3, 4], [5, 6, 10], [7], [8], [9], [0]]
     sim = similarity.similarity(data, 300)
     sim.bins = bins
     sim.delete_bins()
     self.assertTrue(sim.num == 2)
Пример #10
0
 def testElbowDistance(self):
     start = [-122,47]
     end = [-123,47]
     now = time.time()
     t = etatc._createTripEntry(self, now, now, start, end)
     data = [t] * 11
     bins = [[1,2,3,4], [5,6,10], [7], [8], [9], [0]]
     sim = similarity.similarity(data, 300)
     sim.bins = bins
     sim.delete_bins()
     self.assertTrue(sim.num == 2)
 def testReps(self):
     repy = rep.representatives(self.data, self.labels)
     repy.list_clusters()
     repy.get_reps()
     self.assertTrue(len(repy.reps) == len(repy.clusters))
     clusters = [0]
     now = time.time()
     tripa = etatc._createTripEntry(self, now, now, [1,2], [3,4])
     tripb = etatc._createTripEntry(self, now, now, [9,10], [5,8])
     tripc = etatc._createTripEntry(self, now, now, [5,6], [4,6])
     data = [tripa, tripb, tripc]
     labels = [0,0,0]
     repy = rep.representatives(data, labels)
     repy.list_clusters()
     repy.get_reps()
     logging.debug("repy.reps[0].data.start_loc = %s" % repy.reps[0].data.start_loc)
     self.assertEqual(repy.reps[0].data.start_loc.coordinates[0], 5)
     self.assertEqual(repy.reps[0].data.start_loc.coordinates[1], 6)
     self.assertEqual(repy.reps[0].data.end_loc.coordinates[0], 4)
     self.assertEqual(repy.reps[0].data.end_loc.coordinates[1], 6)
Пример #12
0
    def testInit(self):
        try:
            sim = similarity.similarity([], 'b')
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        logging.debug("STARTING init test")
        sim = similarity.similarity([], 100)
        self.assertTrue(len(sim.data) == 0)
        now = time.time()
        start = [-122, 47]
        end = [-123, 47]
        t1 = etatc._createTripEntry(self, now, now, start, start)
        t2 = etatc._createTripEntry(self, now, now, start, end)
        sim = similarity.similarity([t1, t2], 100)
        logging.debug("sim.data = %s" % sim.data)
        simmy = similarity.similarity([t2], 100)
        logging.debug("simmy.data = %s" % simmy.data)
        self.assertTrue(sim.data == simmy.data)
Пример #13
0
 def testReps(self):
     repy = rep.representatives(self.data, self.labels)
     repy.list_clusters()
     repy.get_reps()
     self.assertTrue(len(repy.reps) == len(repy.clusters))
     clusters = [0]
     now = time.time()
     tripa = etatc._createTripEntry(self, now, now, [1, 2], [3, 4])
     tripb = etatc._createTripEntry(self, now, now, [9, 10], [5, 8])
     tripc = etatc._createTripEntry(self, now, now, [5, 6], [4, 6])
     data = [tripa, tripb, tripc]
     labels = [0, 0, 0]
     repy = rep.representatives(data, labels)
     repy.list_clusters()
     repy.get_reps()
     logging.debug("repy.reps[0].data.start_loc = %s" %
                   repy.reps[0].data.start_loc)
     self.assertEqual(repy.reps[0].data.start_loc.coordinates[0], 5)
     self.assertEqual(repy.reps[0].data.start_loc.coordinates[1], 6)
     self.assertEqual(repy.reps[0].data.end_loc.coordinates[0], 4)
     self.assertEqual(repy.reps[0].data.end_loc.coordinates[1], 6)
Пример #14
0
    def testInit(self):
        try:
            sim = similarity.similarity([], 'b')
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        logging.debug("STARTING init test")
        sim = similarity.similarity([], 100)
        self.assertTrue(len(sim.data) == 0)
        now = time.time()
        start = [-122,47]
        end = [-123,47]
        t1 = etatc._createTripEntry(self, now, now, start, start)
        t2 = etatc._createTripEntry(self, now, now, start, end)
        sim = similarity.similarity([t1, t2], 100)
        logging.debug("sim.data = %s" % sim.data)
        simmy = similarity.similarity([t2], 100)
        logging.debug("simmy.data = %s" % simmy.data)
        self.assertTrue(sim.data == simmy.data)
Пример #15
0
    def testCluster(self):
        feat = featurization.featurization(self.data)
        feat.cluster(min_clusters=2, max_clusters=10)
        self.assertTrue(len(feat.labels) == len(feat.points))
        self.assertTrue(feat.clusters == len(set(feat.labels)))
        a = feat.cluster(name='kmeans', min_clusters=5, max_clusters=20)
        self.assertTrue(len(feat.labels) == len(feat.points))
        self.assertTrue(feat.clusters == len(set(feat.labels)))
        b = feat.cluster(name='nonname', min_clusters=5, max_clusters=20)
        self.assertTrue(
            a == b)  #defaults to kmeans with invalid clustering method
        feat.cluster(min_clusters=len(self.data) + 1)
        c = feat.cluster(min_clusters=0, max_clusters=20)
        d = feat.cluster(min_clusters=2, max_clusters=20)
        self.assertTrue(c == d)
        try:
            feat.cluster(min_clusters=10, max_clusters=2)
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        data = []
        start = [-122, 47]
        end = [-123, 47]
        now = time.time()
        for i in range(10):
            a = etatc._createTripEntry(self, now, now, start, end)
            data.append(a)
        start = [-74, 41]
        end = [-74, 42]
        for i in range(10):
            a = etatc._createTripEntry(self, now, now, start, end)
            data.append(a)
        feat = featurization.featurization(data)
        feat.cluster()
        self.assertTrue(len(set(feat.labels)) == 2)
    def testCluster(self):
        feat = featurization.featurization(self.data)
        feat.cluster(min_clusters=2, max_clusters=10)
        self.assertTrue(len(feat.labels) == len(feat.points))
        self.assertTrue(feat.clusters == len(set(feat.labels)))
        a = feat.cluster(name='kmeans', min_clusters=5, max_clusters=20)
        self.assertTrue(len(feat.labels) == len(feat.points))
        self.assertTrue(feat.clusters == len(set(feat.labels)))
        b = feat.cluster(name='nonname', min_clusters=5, max_clusters=20)
        self.assertTrue(a == b) #defaults to kmeans with invalid clustering method
        feat.cluster(min_clusters=len(self.data)+1)
        c = feat.cluster(min_clusters = 0, max_clusters=20)
        d = feat.cluster(min_clusters = 2, max_clusters=20)
        self.assertTrue(c == d)
        try:
            feat.cluster(min_clusters = 10, max_clusters=2)
        except ValueError:
            self.assertTrue(True)
        except Exception:
            self.assertTrue(False)

        data = []
        start = [-122, 47]
        end = [-123,47]
        now = time.time()
        for i in range(10):
            a = etatc._createTripEntry(self, now, now, start, end)
            data.append(a)
        start = [-74, 41]
        end = [-74, 42]
        for i in range(10):
            a = etatc._createTripEntry(self, now, now, start, end)
            data.append(a)
        feat = featurization.featurization(data)
        feat.cluster()
        self.assertTrue(len(set(feat.labels)) == 2)
 def testCalculatePoints(self):
     feat = featurization.featurization([])
     self.assertTrue(not feat.data)
     feat = featurization.featurization(None)
     self.assertTrue(not feat.data)
     trip = etatc._createTripEntry(self, None, None, None, None)
     data = [trip]
     try:
         feat = featurization.featurization(data)
     except AttributeError:
         self.assertTrue(True)
     except Exception:
         self.assertTrue(False)
     feat = featurization.featurization(self.data)
     self.assertTrue(len(feat.points) == len(feat.data))
     for p in feat.points:
         self.assertTrue(None not in p)
Пример #18
0
 def testCalculatePoints(self):
     feat = featurization.featurization([])
     self.assertTrue(not feat.data)
     feat = featurization.featurization(None)
     self.assertTrue(not feat.data)
     trip = etatc._createTripEntry(self, None, None, None, None)
     data = [trip]
     try:
         feat = featurization.featurization(data)
     except AttributeError:
         self.assertTrue(True)
     except Exception:
         self.assertTrue(False)
     feat = featurization.featurization(self.data)
     self.assertTrue(len(feat.points) == len(feat.data))
     for p in feat.points:
         self.assertTrue(None not in p)