Exemplo n.º 1
0
    def test_multiple(self):
        self._add([
            (1.0, 2.0, self.yesterday),
            (-10.0, 40.0, self.yesterday),
        ])
        self._queue([
            (1.0, 2.0), (1.0, 2.0),
            (40.0011, 3.0011), (40.0012, 3.0012), (40.0013, 3.0013),
            (0.0, 0.0),
            (1.0, 2.0),
            (1.00001, 2.00001),
        ])
        for shard_id in DataMap.shards():
            update_datamap.delay(batch=2, shard_id=shard_id).get()

        rows = []
        for shard in DataMap.shards().values():
            rows.extend(self.session.query(shard).all())

        self.assertEqual(len(rows), 4)
        created = set()
        modified = set()
        positions = set()
        for row in rows:
            lat, lon = row.grid
            created.add(row.created)
            modified.add(row.modified)
            positions.add((lat / 1000.0, lon / 1000.0))

        self.assertEqual(created, set([self.today, self.yesterday]))
        self.assertEqual(modified, set([self.today, self.yesterday]))
        self.assertEqual(
            positions,
            set([(1.0, 2.0), (-10.0, 40.0), (0.0, 0.0), (40.001, 3.001)]))
Exemplo n.º 2
0
    def test_files(self, session):
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row["lat"], row["lon"])
            data = DataMap.shard_model(lat, lon)(
                grid=(lat, lon), created=row["time"], modified=row["time"]
            )
            session.add(data)
        session.flush()

        lines = []
        rows = 0

        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, "quadtrees")
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, "shapes")
            tiles = os.path.join(temp_dir, "tiles")

            for shard_id, shard in DataMap.shards().items():
                filename = "map_%s.csv.gz" % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(filepath, shard.__tablename__, _session=session)

                if not result:
                    assert not os.path.isfile(filepath)
                    continue

                rows += result
                with util.gzip_open(filepath, "r") as fd:
                    written = fd.read()
                lines.extend([line.split(",") for line in written.split()])

                encode_file(filename, temp_dir, quaddir)

                quadfolder = os.path.join(quaddir, "map_" + shard_id)
                assert os.path.isdir(quadfolder)
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2)
            assert sorted(os.listdir(tiles)) == ["0", "1", "2"]
            assert sorted(os.listdir(os.path.join(tiles, "0", "0"))) == [
                "0.png",
                "*****@*****.**",
            ]

        assert rows == 18
        assert len(lines) == 18
        lats = [round(float(line[0]), 2) for line in lines]
        longs = [round(float(line[1]), 2) for line in lines]
        assert set(lats) == set([-10.0, 0.0, 12.35])
        assert set(longs) == set([-11.0, 12.35])
Exemplo n.º 3
0
    def test_files(self, db, session):  # pragma: no cover
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row['lat'], row['lon'])
            data = DataMap.shard_model(lat, lon)(grid=(lat, lon),
                                                 created=row['time'],
                                                 modified=row['time'])
            session.add(data)
        session.flush()

        lines = []
        rows = 0
        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, 'quadtrees')
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, 'shapes')
            tiles = os.path.join(temp_dir, 'tiles')

            for shard_id, shard in DataMap.shards().items():
                filename = 'map_%s.csv.gz' % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(filepath,
                                     shard.__tablename__,
                                     _session=session)

                if not result:
                    assert not os.path.isfile(filepath)
                    continue

                rows += result
                with util.gzip_open(filepath, 'r') as fd:
                    written = fd.read()
                lines.extend([line.split(',') for line in written.split()])

                encode_file(filename, temp_dir, quaddir)

                quadfolder = os.path.join(quaddir, 'map_' + shard_id)
                assert os.path.isdir(quadfolder)
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2)
            assert (sorted(os.listdir(tiles)) == ['0', '1', '2'])
            assert (sorted(os.listdir(os.path.join(
                tiles, '0', '0'))) == ['0.png', '*****@*****.**'])

        assert rows == 18
        assert len(lines) == 18
        assert (set([round(float(l[0]), 2)
                     for l in lines]) == set([-10.0, 0.0, 12.35]))
        assert (set([round(float(l[1]), 2)
                     for l in lines]) == set([-11.0, 12.35]))
Exemplo n.º 4
0
    def test_files(self, db_rw, session):
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row['lat'], row['lon'])
            data = DataMap.shard_model(lat, lon)(
                grid=(lat, lon), created=row['time'], modified=row['time'])
            session.add(data)
        session.flush()

        lines = []
        rows = 0
        db_url = str(db_rw.engine.url)
        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, 'quadtrees')
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, 'shapes')
            tiles = os.path.join(temp_dir, 'tiles')

            for shard_id, shard in DataMap.shards().items():
                filename = 'map_%s.csv.gz' % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(
                    db_url, filepath, shard.__tablename__,
                    _session=session)

                if not result:
                    assert not os.path.isfile(filepath)
                    continue

                rows += result
                with util.gzip_open(filepath, 'r') as fd:
                    written = fd.read()
                lines.extend([line.split(',') for line in written.split()])

                encode_file(filename, temp_dir, quaddir, DATAMAPS_DIR)

                quadfolder = os.path.join(quaddir, 'map_' + shard_id)
                assert os.path.isdir(quadfolder)
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes, DATAMAPS_DIR)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2, DATAMAPS_DIR, PNGQUANT)
            assert (sorted(os.listdir(tiles)) == ['0', '1', '2'])
            assert (sorted(os.listdir(os.path.join(tiles, '0', '0'))) ==
                    ['0.png', '*****@*****.**'])

        assert rows == 36
        assert len(lines) == 36
        assert (set([round(float(l[0]), 2) for l in lines]) ==
                set([-10.0, 0.0, 12.35]))
        assert (set([round(float(l[1]), 2) for l in lines]) ==
                set([-11.0, 12.35]))
Exemplo n.º 5
0
 def _add(self, entries):
     for lat, lon, time in entries:
         lat, lon = DataMap.scale(lat, lon)
         self.session.add(
             DataMap.shard_model(lat, lon)(grid=(lat, lon),
                                           created=time,
                                           modified=time))
     self.session.flush()
Exemplo n.º 6
0
    def _queue(self, pairs):
        grids = defaultdict(list)
        for lat, lon in pairs:
            lat, lon = DataMap.scale(lat, lon)
            shard_id = DataMap.shard_id(lat, lon)
            grids[shard_id].append(encode_datamap_grid(lat, lon))

        for shard_id, values in grids.items():
            queue = self.celery_app.data_queues['update_datamap_' + shard_id]
            queue.enqueue(list(values))
Exemplo n.º 7
0
    def _queue(self, pairs):
        grids = defaultdict(list)
        for lat, lon in pairs:
            lat, lon = DataMap.scale(lat, lon)
            shard_id = DataMap.shard_id(lat, lon)
            grids[shard_id].append(encode_datamap_grid(lat, lon))

        for shard_id, values in grids.items():
            queue = self.celery_app.data_queues['update_datamap_' + shard_id]
            queue.enqueue(list(values), json=False)
Exemplo n.º 8
0
    def test_one(self):
        lat = 1.234567
        lon = 2.345678
        shard_id = DataMap.shard_id(*DataMap.scale(lat, lon))
        self._queue([(lat, lon)])
        update_datamap.delay(shard_id=shard_id).get()

        grids = self.session.query(DataMap.shards()[shard_id]).all()
        self.assertEqual(len(grids), 1)
        self._check_position(grids[0], 1.235, 2.346)
        self.assertEqual(grids[0].created, self.today)
        self.assertEqual(grids[0].modified, self.today)
Exemplo n.º 9
0
    def test_one(self, celery, session):
        lat = 1.234567
        lon = 2.345678
        shard_id = DataMap.shard_id(*DataMap.scale(lat, lon))
        self._queue(celery, [(lat, lon)])
        update_datamap.delay(shard_id=shard_id).get()

        grids = session.query(DataMap.shards()[shard_id]).all()
        assert len(grids) == 1
        self._check_position(grids[0], 1.235, 2.346)
        assert grids[0].created == self.today
        assert grids[0].modified == self.today
Exemplo n.º 10
0
    def test_one(self):
        lat = 1.234567
        lon = 2.345678
        shard_id = DataMap.shard_id(*DataMap.scale(lat, lon))
        self._queue([(lat, lon)])
        update_datamap.delay(shard_id=shard_id).get()

        grids = self.session.query(DataMap.shards()[shard_id]).all()
        self.assertEqual(len(grids), 1)
        self._check_position(grids[0], 1.235, 2.346)
        self.assertEqual(grids[0].created, self.today)
        self.assertEqual(grids[0].modified, self.today)
Exemplo n.º 11
0
    def test_one(self, celery, session):
        lat = 1.234567
        lon = 2.345678
        shard_id = DataMap.shard_id(*DataMap.scale(lat, lon))
        self._queue(celery, [(lat, lon)])
        update_datamap.delay(shard_id=shard_id).get()

        grids = session.query(DataMap.shards()[shard_id]).all()
        assert len(grids) == 1
        self._check_position(grids[0], 1.235, 2.346)
        assert grids[0].created == self.today
        assert grids[0].modified == self.today
Exemplo n.º 12
0
    def test_files(self):
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row["lat"], row["lon"])
            data = DataMap.shard_model(lat, lon)(grid=(lat, lon), created=row["time"], modified=row["time"])
            self.session.add(data)
        self.session.flush()

        lines = []
        rows = 0
        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, "quadtrees")
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, "shapes")
            tiles = os.path.join(temp_dir, "tiles")

            for shard_id, shard in DATAMAP_SHARDS.items():
                filename = "map_%s.csv.gz" % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(None, filepath, shard.__tablename__, _db_rw=_make_db(), _session=self.session)

                if not result:
                    self.assertFalse(os.path.isfile(filepath))
                    continue

                rows += result
                with util.gzip_open(filepath, "r") as fd:
                    written = fd.read()
                lines.extend([line.split(",") for line in written.split()])

                encode_file(filename, temp_dir, quaddir, DATAMAPS_DIR)

                quadfolder = os.path.join(quaddir, "map_" + shard_id)
                self.assertTrue(os.path.isdir(quadfolder))
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes, DATAMAPS_DIR)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2, DATAMAPS_DIR, PNGQUANT)
            self.assertEqual(sorted(os.listdir(tiles)), ["0", "1", "2"])
            self.assertEqual(sorted(os.listdir(os.path.join(tiles, "0", "0"))), ["0.png", "*****@*****.**"])

        self.assertEqual(rows, 36)
        self.assertEqual(len(lines), 36)
        self.assertEqual(set([round(float(l[0]), 2) for l in lines]), set([-10.0, 0.0, 12.35]))
        self.assertEqual(set([round(float(l[1]), 2) for l in lines]), set([-11.0, 12.35]))
Exemplo n.º 13
0
    def test_update(self):
        lat = 1.0
        lon = 2.0
        shard_id = DataMap.shard_id(*DataMap.scale(lat, lon))
        self._add([(lat, lon, self.yesterday)])
        self._queue([(lat, lon)])
        update_datamap.delay(shard_id=shard_id).get()

        grids = self.session.query(DataMap.shards()[shard_id]).all()
        self.assertEqual(len(grids), 1)
        self._check_position(grids[0], 1.0, 2.0)
        self.assertEqual(grids[0].created, self.yesterday)
        self.assertEqual(grids[0].modified, self.today)
Exemplo n.º 14
0
    def test_update(self, celery, session):
        lat = 1.0
        lon = 2.0
        shard_id = DataMap.shard_id(*DataMap.scale(lat, lon))
        self._add(session, [(lat, lon, self.yesterday)])
        self._queue(celery, [(lat, lon)])
        update_datamap.delay(shard_id=shard_id).get()

        grids = session.query(DataMap.shards()[shard_id]).all()
        assert len(grids) == 1
        self._check_position(grids[0], 1.0, 2.0)
        assert grids[0].created == self.yesterday
        assert grids[0].modified == self.today
Exemplo n.º 15
0
    def test_update(self, celery, session):
        lat = 1.0
        lon = 2.0
        shard_id = DataMap.shard_id(*DataMap.scale(lat, lon))
        self._add(session, [(lat, lon, self.yesterday)])
        self._queue(celery, [(lat, lon)])
        update_datamap.delay(shard_id=shard_id).get()

        grids = session.query(DataMap.shards()[shard_id]).all()
        assert len(grids) == 1
        self._check_position(grids[0], 1.0, 2.0)
        assert grids[0].created == self.yesterday
        assert grids[0].modified == self.today
Exemplo n.º 16
0
    def test_update(self):
        lat = 1.0
        lon = 2.0
        shard_id = DataMap.shard_id(*DataMap.scale(lat, lon))
        self._add([(lat, lon, self.yesterday)])
        self._queue([(lat, lon)])
        update_datamap.delay(shard_id=shard_id).get()

        grids = self.session.query(DataMap.shards()[shard_id]).all()
        self.assertEqual(len(grids), 1)
        self._check_position(grids[0], 1.0, 2.0)
        self.assertEqual(grids[0].created, self.yesterday)
        self.assertEqual(grids[0].modified, self.today)
Exemplo n.º 17
0
 def test_shard_id(self):
     self.assertEqual(DataMap.shard_id(None, None), None)
     self.assertEqual(DataMap.shard_id(85000, 180000), 'ne')
     self.assertEqual(DataMap.shard_id(36000, 5000), 'ne')
     self.assertEqual(DataMap.shard_id(35999, 5000), 'se')
     self.assertEqual(DataMap.shard_id(-85000, 180000), 'se')
     self.assertEqual(DataMap.shard_id(85000, -180000), 'nw')
     self.assertEqual(DataMap.shard_id(36000, 4999), 'nw')
     self.assertEqual(DataMap.shard_id(35999, 4999), 'sw')
     self.assertEqual(DataMap.shard_id(-85000, -180000), 'sw')
Exemplo n.º 18
0
 def test_shard_id(self):
     assert DataMap.shard_id(None, None) is None
     assert DataMap.shard_id(85000, 180000) == "ne"
     assert DataMap.shard_id(36000, 5000) == "ne"
     assert DataMap.shard_id(35999, 5000) == "se"
     assert DataMap.shard_id(-85000, 180000) == "se"
     assert DataMap.shard_id(85000, -180000) == "nw"
     assert DataMap.shard_id(36000, 4999) == "nw"
     assert DataMap.shard_id(35999, 4999) == "sw"
     assert DataMap.shard_id(-85000, -180000) == "sw"
Exemplo n.º 19
0
 def test_shard_id(self):
     assert DataMap.shard_id(None, None) is None
     assert DataMap.shard_id(85000, 180000) == 'ne'
     assert DataMap.shard_id(36000, 5000) == 'ne'
     assert DataMap.shard_id(35999, 5000) == 'se'
     assert DataMap.shard_id(-85000, 180000) == 'se'
     assert DataMap.shard_id(85000, -180000) == 'nw'
     assert DataMap.shard_id(36000, 4999) == 'nw'
     assert DataMap.shard_id(35999, 4999) == 'sw'
     assert DataMap.shard_id(-85000, -180000) == 'sw'
Exemplo n.º 20
0
 def test_grid_bytes(self):
     lat = 12000
     lon = 34000
     grid = encode_datamap_grid(lat, lon)
     model = DataMap.shard_model(lat, lon)
     self.session.add(model(grid=grid))
     self.session.flush()
     result = self.session.query(model).first()
     self.assertEqual(result.grid, (lat, lon))
Exemplo n.º 21
0
    def test_multiple(self, celery, session):
        self._add(
            session,
            [
                (0.0, 1.0, self.today),
                (1.0, 2.0, self.yesterday),
                (-10.0, 40.0, self.yesterday),
            ],
        )
        self._queue(
            celery,
            [
                (0.0, 1.0),
                (1.0, 2.0),
                (1.0, 2.0),
                (40.0011, 3.0011),
                (40.0012, 3.0012),
                (40.0013, 3.0013),
                (0.0, 0.0),
                (1.0, 2.0),
                (1.00001, 2.00001),
            ],
        )
        for shard_id in DataMap.shards():
            update_datamap.delay(shard_id=shard_id).get()

        rows = []
        for shard in DataMap.shards().values():
            rows.extend(session.query(shard).all())

        assert len(rows) == 5
        created = set()
        modified = set()
        positions = set()
        for row in rows:
            lat, lon = row.grid
            created.add(row.created)
            modified.add(row.modified)
            positions.add((lat / 1000.0, lon / 1000.0))

        assert created == set([self.today, self.yesterday])
        assert modified == set([self.today, self.yesterday])
        assert positions == set([(0.0, 0.0), (0.0, 1.0), (1.0, 2.0),
                                 (-10.0, 40.0), (40.001, 3.001)])
Exemplo n.º 22
0
 def test_fields(self):
     today = util.utcnow().date()
     lat = 12345
     lon = -23456
     model = DataMap.shard_model(lat, lon)
     self.session.add(model(grid=(lat, lon), created=today, modified=today))
     self.session.flush()
     result = self.session.query(model).first()
     self.assertEqual(result.grid, (lat, lon))
     self.assertEqual(result.created, today)
     self.assertEqual(result.modified, today)
Exemplo n.º 23
0
    def test_files(self, temp_dir, mock_db_worker_session):
        lines = []
        rows = 0

        csvdir = os.path.join(temp_dir, "csv")
        os.mkdir(csvdir)
        quaddir = os.path.join(temp_dir, "quadtrees")
        os.mkdir(quaddir)
        shapes = os.path.join(temp_dir, "shapes")
        tiles = os.path.join(temp_dir, "tiles")

        expected = {"ne": (0, 0), "nw": (0, 0), "se": (12, 1), "sw": (6, 1)}
        for shard_id, shard in DataMap.shards().items():
            filename = f"map_{shard_id}.csv"
            filepath = os.path.join(csvdir, filename)
            row_count, file_count = export_to_csv(filename, csvdir,
                                                  shard.__tablename__)
            assert row_count == expected[shard_id][0]
            assert file_count == expected[shard_id][1]

            if not row_count:
                assert not os.path.isfile(filepath)
                continue

            rows += row_count
            with open(filepath, "r") as fd:
                written = fd.read()
            lines.extend([line.split(",") for line in written.split()])

            csv_to_quadtree(filename, csvdir, quaddir)

            quadfolder = os.path.join(quaddir, "map_" + shard_id)
            assert os.path.isdir(quadfolder)
            self._check_quadtree(quadfolder)

        assert rows
        merge_quadtrees(quaddir, shapes)
        self._check_quadtree(shapes)

        with Pool() as pool:
            render_tiles(pool, shapes, tiles, max_zoom=2)
        assert sorted(os.listdir(tiles)) == ["0", "1", "2"]
        assert sorted(os.listdir(os.path.join(tiles, "0", "0"))) == [
            "0.png",
            "*****@*****.**",
        ]

        assert rows == 18
        assert len(lines) == 18
        lats = [round(float(line[0]), 2) for line in lines]
        longs = [round(float(line[1]), 2) for line in lines]
        assert set(lats) == set([-10.0, 0.0, 12.35])
        assert set(longs) == set([-11.0, 12.35])
Exemplo n.º 24
0
def export_files(pool, db_url, csvdir):  # pragma: no cover
    jobs = []
    result_rows = 0
    for shard_id, shard in sorted(DataMap.shards().items()):
        # sorting the shards prefers the north which contains more
        # data points than the south
        filename = os.path.join(csvdir, 'map_%s.csv.gz' % shard_id)
        jobs.append(pool.apply_async(export_file,
                                     (db_url, filename, shard.__tablename__)))

    for job in jobs:
        result_rows += job.get()

    return result_rows
Exemplo n.º 25
0
    def test_multiple(self):
        self._add([
            (1.0, 2.0, self.yesterday),
            (-10.0, 40.0, self.yesterday),
        ])
        self._queue([
            (1.0, 2.0),
            (1.0, 2.0),
            (40.0011, 3.0011),
            (40.0012, 3.0012),
            (40.0013, 3.0013),
            (0.0, 0.0),
            (1.0, 2.0),
            (1.00001, 2.00001),
        ])
        for shard_id in DataMap.shards():
            update_datamap.delay(batch=2, shard_id=shard_id).get()

        rows = []
        for shard in DataMap.shards().values():
            rows.extend(self.session.query(shard).all())

        self.assertEqual(len(rows), 4)
        created = set()
        modified = set()
        positions = set()
        for row in rows:
            lat, lon = row.grid
            created.add(row.created)
            modified.add(row.modified)
            positions.add((lat / 1000.0, lon / 1000.0))

        self.assertEqual(created, set([self.today, self.yesterday]))
        self.assertEqual(modified, set([self.today, self.yesterday]))
        self.assertEqual(
            positions,
            set([(1.0, 2.0), (-10.0, 40.0), (0.0, 0.0), (40.001, 3.001)]))
Exemplo n.º 26
0
    def test_multiple(self, celery, session):
        self._add(session, [
            (0.0, 1.0, self.today),
            (1.0, 2.0, self.yesterday),
            (-10.0, 40.0, self.yesterday),
        ])
        self._queue(celery, [
            (0.0, 1.0),
            (1.0, 2.0), (1.0, 2.0),
            (40.0011, 3.0011), (40.0012, 3.0012), (40.0013, 3.0013),
            (0.0, 0.0),
            (1.0, 2.0),
            (1.00001, 2.00001),
        ])
        for shard_id in DataMap.shards():
            update_datamap.delay(shard_id=shard_id).get()

        rows = []
        for shard in DataMap.shards().values():
            rows.extend(session.query(shard).all())

        assert len(rows) == 5
        created = set()
        modified = set()
        positions = set()
        for row in rows:
            lat, lon = row.grid
            created.add(row.created)
            modified.add(row.modified)
            positions.add((lat / 1000.0, lon / 1000.0))

        assert created == set([self.today, self.yesterday])
        assert modified == set([self.today, self.yesterday])
        assert (positions == set([
            (0.0, 0.0), (0.0, 1.0), (1.0, 2.0),
            (-10.0, 40.0), (40.001, 3.001)]))
Exemplo n.º 27
0
    def test_cleanup(self, celery, session):
        session.add_all([
            self._one(37.0, 6.0, self.today),
            self._one(37.0, 6.1, self.today - timedelta(days=366)),
            self._one(37.0, 4.0, self.today),
            self._one(37.0, 4.1, self.today - timedelta(days=366)),
            self._one(10.0, 6.0, self.today),
            self._one(10.0, 6.1, self.today - timedelta(days=366)),
            self._one(10.0, 4.0, self.today),
            self._one(10.0, 4.1, self.today - timedelta(days=366)),
        ])
        session.flush()

        for shard_id, shard in DataMap.shards().items():
            cleanup_datamap.delay(shard_id=shard_id).get()
            assert session.query(shard).count() == 1
Exemplo n.º 28
0
    def test_cleanup(self, celery, session):
        session.add_all([
            self._one(37.0, 6.0, self.today),
            self._one(37.0, 6.1, self.today - timedelta(days=366)),
            self._one(37.0, 4.0, self.today),
            self._one(37.0, 4.1, self.today - timedelta(days=366)),
            self._one(10.0, 6.0, self.today),
            self._one(10.0, 6.1, self.today - timedelta(days=366)),
            self._one(10.0, 4.0, self.today),
            self._one(10.0, 4.1, self.today - timedelta(days=366)),
        ])
        session.flush()

        for shard_id, shard in DataMap.shards().items():
            cleanup_datamap.delay(shard_id=shard_id).get()
            assert session.query(shard).count() == 1
Exemplo n.º 29
0
def export_to_csvs(pool, csv_dir):
    """
    Export from database tables to CSV.

    For small database tables, there will be one CSV created, such as
    "map_ne.csv" for the datamap_ne (northeast) table.

    For large database tables, there will be multiple CSVs created,
    such as "submap_ne_0001.csv".

    :param pool: A multiprocessing pool
    :csv_dir: The directory to write CSV output files
    :return: A tuple of counts (rows, CSVs)
    """
    jobs = []
    result_rows = 0
    result_csvs = 0
    for shard_id, shard in sorted(DataMap.shards().items()):
        # sorting the shards prefers the north which contains more
        # data points than the south
        filename = f"map_{shard_id}.csv"
        jobs.append(
            pool.apply_async(export_to_csv, (filename, csv_dir, shard.__tablename__))
        )

    # Run export jobs to completion
    def on_success(result):
        nonlocal result_rows, result_csvs
        rows, csvs = result
        result_rows += rows
        result_csvs += csvs

    def on_progress(tables_complete, table_percent):
        nonlocal result_rows
        LOG.debug(
            f"  Exported {result_rows:,} row{_s(result_rows)}"
            f" from {tables_complete:,} table{_s(tables_complete)}"
            f" to {result_csvs:,} CSV file{_s(result_csvs)}"
            f" ({table_percent:0.1%})"
        )

    watch_jobs(jobs, on_success=on_success, on_progress=on_progress)
    return result_rows, result_csvs
Exemplo n.º 30
0
    def test_multiple_csv(self, temp_dir, raven, mock_db_worker_session):
        """export_to_csv creates multiple CSVs at the file_limit."""

        expected = {"ne": (0, 0), "nw": (0, 0), "se": (12, 2), "sw": (6, 1)}
        csv_dir = os.path.join(temp_dir, "csv")
        os.mkdir(csv_dir)

        for shard_id, shard in DataMap.shards().items():
            filename = f"map_{shard_id}.csv"
            filepath = os.path.join(csv_dir, filename)
            row_count, file_count = export_to_csv(filename,
                                                  csv_dir,
                                                  shard.__tablename__,
                                                  file_limit=1)
            assert row_count == expected[shard_id][0]
            assert file_count == expected[shard_id][1]

            if not row_count:
                assert not os.path.isfile(filepath)
            elif file_count == 1:
                assert os.path.isfile(filepath)
            else:
                assert not os.path.isfile(filepath)
                for num in range(1, file_count + 1):
                    filename_n = f"submap_{shard_id}_{num:04}.csv"
                    filepath_n = os.path.join(csv_dir, filename_n)
                    assert os.path.isfile(filepath_n)

        quad_dir = os.path.join(temp_dir, "quadtrees")
        os.mkdir(quad_dir)
        with Pool() as pool:
            result = csv_to_quadtrees(pool, csv_dir, quad_dir)
            csv_count, intermediate_quad_count, final_quad_count = result
            assert csv_count == 3
            assert intermediate_quad_count == 2
            assert final_quad_count == 2
Exemplo n.º 31
0
 def _one(self, lat, lon, time):
     lat, lon = DataMap.scale(lat, lon)
     return DataMap.shard_model(lat, lon)(
         grid=(lat, lon), created=time, modified=time)
Exemplo n.º 32
0
 def _check_position(self, stat, lat, lon):
     self.assertEqual(stat.grid, DataMap.scale(lat, lon))
Exemplo n.º 33
0
 def _check_position(self, stat, lat, lon):
     assert stat.grid == DataMap.scale(lat, lon)
Exemplo n.º 34
0
 def test_empty(self, celery, session):
     for shard_id, shard in DataMap.shards().items():
         update_datamap.delay(shard_id=shard_id).get()
         assert session.query(shard).count() == 0
Exemplo n.º 35
0
 def test_empty(self, celery, session):
     for shard_id, shard in DataMap.shards().items():
         update_datamap.delay(shard_id=shard_id).get()
         assert session.query(shard).count() == 0
Exemplo n.º 36
0
 def test_grid_list(self):
     lat = 1000
     lon = -2000
     self.session.add(DataMap.shard_model(lat, lon)(grid=[lat, lon]))
     with self.assertRaises(Exception):
         self.session.flush()
Exemplo n.º 37
0
 def test_grid_length(self):
     self.session.add(DataMap.shard_model(0, 9)(grid=b'\x00' * 9))
     with self.assertRaises(Exception):
         self.session.flush()
Exemplo n.º 38
0
 def test_grid_none(self):
     self.session.add(DataMap.shard_model(0, 0)(grid=None))
     with self.assertRaises(Exception):
         self.session.flush()
Exemplo n.º 39
0
 def __init__(self, task, shard_id=None):
     self.task = task
     self.shard_id = shard_id
     self.shard = DataMap.shards().get(shard_id)
Exemplo n.º 40
0
 def test_grid_length(self, session):
     session.add(DataMap.shard_model(0, 9)(grid=b'\x00' * 9))
     with pytest.raises(Exception):
         session.flush()
Exemplo n.º 41
0
 def test_grid_none(self, session):
     with warnings.catch_warnings():
         warnings.simplefilter('ignore', SAWarning)
         session.add(DataMap.shard_model(0, 0)(grid=None))
         with pytest.raises(Exception):
             session.flush()
Exemplo n.º 42
0
 def test_empty(self):
     for shard_id, shard in DataMap.shards().items():
         update_datamap.delay(shard_id=shard_id).get()
         self.assertEqual(self.session.query(shard).count(), 0)
Exemplo n.º 43
0
 def _add(self, entries):
     for lat, lon, time in entries:
         lat, lon = DataMap.scale(lat, lon)
         self.session.add(DataMap.shard_model(lat, lon)(
             grid=(lat, lon), created=time, modified=time))
     self.session.flush()
Exemplo n.º 44
0
 def _check_position(self, stat, lat, lon):
     self.assertEqual(stat.grid, DataMap.scale(lat, lon))
Exemplo n.º 45
0
 def __init__(self, task, session, pipe, shard_id=None):
     DataTask.__init__(self, task, session)
     self.pipe = pipe
     self.shard_id = shard_id
     self.shard = DataMap.shards().get(shard_id)
Exemplo n.º 46
0
 def test_scale(self):
     assert DataMap.scale(-1.12345678, 2.23456789) == (-1123, 2235)
Exemplo n.º 47
0
 def test_scale(self):
     self.assertEqual(DataMap.scale(-1.12345678, 2.23456789),
                      (-1123, 2235))
Exemplo n.º 48
0
 def test_empty(self):
     for shard_id, shard in DataMap.shards().items():
         update_datamap.delay(shard_id=shard_id).get()
         self.assertEqual(self.session.query(shard).count(), 0)