Пример #1
0
    def test_files(self, session):
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row["lat"], row["lon"])
            data = DataMap.shard_model(lat, lon)(
                grid=(lat, lon), created=row["time"], modified=row["time"]
            )
            session.add(data)
        session.flush()

        lines = []
        rows = 0

        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, "quadtrees")
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, "shapes")
            tiles = os.path.join(temp_dir, "tiles")

            for shard_id, shard in DataMap.shards().items():
                filename = "map_%s.csv.gz" % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(filepath, shard.__tablename__, _session=session)

                if not result:
                    assert not os.path.isfile(filepath)
                    continue

                rows += result
                with util.gzip_open(filepath, "r") as fd:
                    written = fd.read()
                lines.extend([line.split(",") for line in written.split()])

                encode_file(filename, temp_dir, quaddir)

                quadfolder = os.path.join(quaddir, "map_" + shard_id)
                assert os.path.isdir(quadfolder)
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2)
            assert sorted(os.listdir(tiles)) == ["0", "1", "2"]
            assert sorted(os.listdir(os.path.join(tiles, "0", "0"))) == [
                "0.png",
                "*****@*****.**",
            ]

        assert rows == 18
        assert len(lines) == 18
        lats = [round(float(line[0]), 2) for line in lines]
        longs = [round(float(line[1]), 2) for line in lines]
        assert set(lats) == set([-10.0, 0.0, 12.35])
        assert set(longs) == set([-11.0, 12.35])
Пример #2
0
    def __call__(self, hourly=True, _bucket=None):
        if _bucket is None:
            bucket = settings("asset_bucket")
        else:
            bucket = _bucket

        if not bucket:
            return

        now = util.utcnow()
        today = now.date()
        start_time = None
        end_time = None

        if hourly:
            end_time = now.replace(minute=0, second=0)
            file_time = end_time
            file_type = "diff"
            start_time = end_time - timedelta(hours=1)
        else:
            file_time = now.replace(hour=0, minute=0, second=0)
            file_type = "full"

        filename = "MLS-%s-cell-export-" % file_type
        filename = filename + file_time.strftime("%Y-%m-%dT%H0000.csv.gz")

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, filename)
            with self.task.db_session(commit=False) as session:
                write_stations_to_csv(session,
                                      path,
                                      today,
                                      start_time=start_time,
                                      end_time=end_time)
            self.write_stations_to_s3(path, bucket)
Пример #3
0
    def __call__(self, diff=True, _filename=None):
        url = self.settings.get('url')
        apikey = self.settings.get('apikey')
        if not url or not apikey:  # pragma: no cover
            return

        if _filename is None:
            if diff:
                prev_hour = util.utcnow() - timedelta(hours=1)
                _filename = prev_hour.strftime(
                    'cell_towers_diff-%Y%m%d%H.csv.gz')
            else:  # pragma: no cover
                _filename = 'cell_towers.csv.gz'

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, _filename)
            with open(path, 'wb') as temp_file:
                with closing(
                        requests.get(url,
                                     params={
                                         'apiKey': apikey,
                                         'filename': _filename
                                     },
                                     stream=True)) as req:

                    for chunk in req.iter_content(chunk_size=2**20):
                        temp_file.write(chunk)
                        temp_file.flush()

                with self.task.redis_pipeline() as pipe:
                    with self.task.db_session() as session:
                        self.import_stations(session, pipe, path)
Пример #4
0
    def __call__(self, diff=True, _filename=None):
        url = self.settings.get('url')
        apikey = self.settings.get('apikey')
        if not url or not apikey:  # pragma: no cover
            return

        if _filename is None:
            if diff:
                prev_hour = util.utcnow() - timedelta(hours=1)
                _filename = prev_hour.strftime(
                    'cell_towers_diff-%Y%m%d%H.csv.gz')
            else:  # pragma: no cover
                _filename = 'cell_towers.csv.gz'

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, _filename)
            with open(path, 'wb') as temp_file:
                with closing(requests.get(url,
                                          params={'apiKey': apikey,
                                                  'filename': _filename},
                                          stream=True)) as req:

                    for chunk in req.iter_content(chunk_size=2 ** 20):
                        temp_file.write(chunk)
                        temp_file.flush()

                with self.task.redis_pipeline() as pipe:
                    with self.task.db_session() as session:
                        self.import_stations(session, pipe, path)
Пример #5
0
    def __call__(self, hourly=True, _bucket=None):
        if _bucket is None:  # pragma: no cover
            bucket = self.settings['bucket']
        else:
            bucket = _bucket

        if not bucket:  # pragma: no cover
            return

        now = util.utcnow()
        start_time = None
        end_time = None

        if hourly:
            end_time = now.replace(minute=0, second=0)
            file_time = end_time
            file_type = 'diff'
            start_time = end_time - timedelta(hours=1)
        else:
            file_time = now.replace(hour=0, minute=0, second=0)
            file_type = 'full'

        filename = 'MLS-%s-cell-export-' % file_type
        filename = filename + file_time.strftime('%Y-%m-%dT%H0000.csv.gz')

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, filename)
            with self.task.db_session(commit=False) as session:
                write_stations_to_csv(
                    session, path,
                    start_time=start_time, end_time=end_time)
            self.write_stations_to_s3(path, bucket)
Пример #6
0
    def get_csv(self, lo=1, hi=10, time=1408604686):
        cell = self.cell
        line_template = ('UMTS,{mcc},{mnc},{lac},{cid},{psc},{lon:.7f},'
                         '{lat:.7f},1,1,1,{time},{time},')
        lines = [line_template.format(
            mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=i * 1010, psc='',
            lon=cell.lon + i * 0.002,
            lat=cell.lat + i * 0.001,
            time=time)
            for i in range(lo, hi)]
        # add bad lines
        lines.append(line_template.format(
            mcc=cell.mcc, mnc=cell.mnc,
            lac='', cid='', psc=12,
            lon=cell.lon, lat=cell.lat, time=time,
        ))
        lines.append(line_template.format(
            mcc=cell.mcc, mnc=cell.mnc,
            lac='', cid='', psc='',
            lon=cell.lon, lat=cell.lat, time=time,
        ))
        txt = '\n'.join(lines)

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'import.csv.gz')
            with util.gzip_open(path, 'w') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    gzip_file.write(txt)
            yield path
Пример #7
0
    def __call__(self, hourly=True, _bucket=None):
        if _bucket is None:  # pragma: no cover
            bucket = self.settings['bucket']
        else:
            bucket = _bucket

        if not bucket:  # pragma: no cover
            return

        now = util.utcnow()
        start_time = None
        end_time = None

        if hourly:
            end_time = now.replace(minute=0, second=0)
            file_time = end_time
            file_type = 'diff'
            start_time = end_time - timedelta(hours=1)
        else:
            file_time = now.replace(hour=0, minute=0, second=0)
            file_type = 'full'

        filename = 'MLS-%s-cell-export-' % file_type
        filename = filename + file_time.strftime('%Y-%m-%dT%H0000.csv.gz')

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, filename)
            with self.task.db_session(commit=False) as session:
                write_stations_to_csv(session,
                                      path,
                                      start_time=start_time,
                                      end_time=end_time)
            self.write_stations_to_s3(path, bucket)
Пример #8
0
    def test_files(self, db_rw, session):
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row['lat'], row['lon'])
            data = DataMap.shard_model(lat, lon)(
                grid=(lat, lon), created=row['time'], modified=row['time'])
            session.add(data)
        session.flush()

        lines = []
        rows = 0
        db_url = str(db_rw.engine.url)
        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, 'quadtrees')
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, 'shapes')
            tiles = os.path.join(temp_dir, 'tiles')

            for shard_id, shard in DataMap.shards().items():
                filename = 'map_%s.csv.gz' % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(
                    db_url, filepath, shard.__tablename__,
                    _session=session)

                if not result:
                    assert not os.path.isfile(filepath)
                    continue

                rows += result
                with util.gzip_open(filepath, 'r') as fd:
                    written = fd.read()
                lines.extend([line.split(',') for line in written.split()])

                encode_file(filename, temp_dir, quaddir, DATAMAPS_DIR)

                quadfolder = os.path.join(quaddir, 'map_' + shard_id)
                assert os.path.isdir(quadfolder)
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes, DATAMAPS_DIR)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2, DATAMAPS_DIR, PNGQUANT)
            assert (sorted(os.listdir(tiles)) == ['0', '1', '2'])
            assert (sorted(os.listdir(os.path.join(tiles, '0', '0'))) ==
                    ['0.png', '*****@*****.**'])

        assert rows == 36
        assert len(lines) == 36
        assert (set([round(float(l[0]), 2) for l in lines]) ==
                set([-10.0, 0.0, 12.35]))
        assert (set([round(float(l[1]), 2) for l in lines]) ==
                set([-11.0, 12.35]))
Пример #9
0
    def test_local_export(self, celery, session):
        now = util.utcnow()
        today = now.date()
        long_ago = now - timedelta(days=367)
        cell_fixture_fields = ("radio", "cid", "lat", "lon", "mnc", "mcc", "lac")
        base_cell = CellShardFactory.build(radio=Radio.wcdma)
        cell_key = {
            "radio": Radio.wcdma,
            "mcc": base_cell.mcc,
            "mnc": base_cell.mnc,
            "lac": base_cell.lac,
        }
        cells = set()

        for cid in range(190, 200):
            cell = dict(cid=cid, lat=base_cell.lat, lon=base_cell.lon, **cell_key)
            CellShardFactory(**cell)
            cell["lat"] = "%.7f" % cell["lat"]
            cell["lon"] = "%.7f" % cell["lon"]

            cell["radio"] = "UMTS"
            cell_strings = [(field, str(value)) for (field, value) in cell.items()]
            cell_tuple = tuple(sorted(cell_strings))
            cells.add(cell_tuple)

        # add one incomplete / unprocessed cell
        CellShardFactory(cid=210, lat=None, lon=None, **cell_key)
        # add one really old cell
        CellShardFactory(
            cid=220,
            created=long_ago,
            modified=long_ago,
            last_seen=long_ago.date(),
            **cell_key,
        )
        session.commit()

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, "export.csv.gz")
            write_stations_to_csv(session, path, today)

            with util.gzip_open(path, "r") as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    reader = csv.DictReader(gzip_file, CELL_FIELDS)

                    header = next(reader)
                    assert "area" in header.values()

                    exported_cells = set()
                    for exported_cell in reader:
                        exported_cell_filtered = [
                            (field, value)
                            for (field, value) in exported_cell.items()
                            if field in cell_fixture_fields
                        ]
                        exported_cell = tuple(sorted(exported_cell_filtered))
                        exported_cells.add(exported_cell)

                    assert cells == exported_cells
Пример #10
0
    def test_files(self, db, session):  # pragma: no cover
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row['lat'], row['lon'])
            data = DataMap.shard_model(lat, lon)(grid=(lat, lon),
                                                 created=row['time'],
                                                 modified=row['time'])
            session.add(data)
        session.flush()

        lines = []
        rows = 0
        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, 'quadtrees')
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, 'shapes')
            tiles = os.path.join(temp_dir, 'tiles')

            for shard_id, shard in DataMap.shards().items():
                filename = 'map_%s.csv.gz' % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(filepath,
                                     shard.__tablename__,
                                     _session=session)

                if not result:
                    assert not os.path.isfile(filepath)
                    continue

                rows += result
                with util.gzip_open(filepath, 'r') as fd:
                    written = fd.read()
                lines.extend([line.split(',') for line in written.split()])

                encode_file(filename, temp_dir, quaddir)

                quadfolder = os.path.join(quaddir, 'map_' + shard_id)
                assert os.path.isdir(quadfolder)
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2)
            assert (sorted(os.listdir(tiles)) == ['0', '1', '2'])
            assert (sorted(os.listdir(os.path.join(
                tiles, '0', '0'))) == ['0.png', '*****@*****.**'])

        assert rows == 18
        assert len(lines) == 18
        assert (set([round(float(l[0]), 2)
                     for l in lines]) == set([-10.0, 0.0, 12.35]))
        assert (set([round(float(l[1]), 2)
                     for l in lines]) == set([-11.0, 12.35]))
Пример #11
0
    def test_local_export(self):
        cell_fixture_fields = ('radio', 'cid', 'lat', 'lon', 'mnc', 'mcc',
                               'lac')
        base_cell = CellShardFactory.build(radio=Radio.wcdma)
        cell_key = {
            'radio': Radio.wcdma,
            'mcc': base_cell.mcc,
            'mnc': base_cell.mnc,
            'lac': base_cell.lac
        }
        cells = set()

        for cid in range(190, 200):
            cell = dict(cid=cid,
                        lat=base_cell.lat,
                        lon=base_cell.lon,
                        **cell_key)
            CellShardFactory(**cell)
            cell['lat'] = '%.7f' % cell['lat']
            cell['lon'] = '%.7f' % cell['lon']

            cell['radio'] = 'UMTS'
            cell_strings = [(field, str(value))
                            for (field, value) in cell.items()]
            cell_tuple = tuple(sorted(cell_strings))
            cells.add(cell_tuple)

        # add one incomplete / unprocessed cell
        CellShardFactory(cid=210, lat=None, lon=None, **cell_key)
        self.session.commit()

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'export.csv.gz')
            write_stations_to_csv(self.session, path)

            with util.gzip_open(path, 'r') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    reader = csv.DictReader(gzip_file, CELL_FIELDS)

                    header = six.next(reader)
                    self.assertTrue('area' in header.values())

                    exported_cells = set()
                    for exported_cell in reader:
                        exported_cell_filtered = [
                            (field, value)
                            for (field, value) in exported_cell.items()
                            if field in cell_fixture_fields
                        ]
                        exported_cell = tuple(sorted(exported_cell_filtered))
                        exported_cells.add(exported_cell)

                    self.assertEqual(cells, exported_cells)
Пример #12
0
    def test_local_export(self, celery, session):
        now = util.utcnow()
        today = now.date()
        long_ago = now - timedelta(days=367)
        cell_fixture_fields = (
            'radio', 'cid', 'lat', 'lon', 'mnc', 'mcc', 'lac')
        base_cell = CellShardFactory.build(radio=Radio.wcdma)
        cell_key = {'radio': Radio.wcdma, 'mcc': base_cell.mcc,
                    'mnc': base_cell.mnc, 'lac': base_cell.lac}
        cells = set()

        for cid in range(190, 200):
            cell = dict(cid=cid, lat=base_cell.lat,
                        lon=base_cell.lon, **cell_key)
            CellShardFactory(**cell)
            cell['lat'] = '%.7f' % cell['lat']
            cell['lon'] = '%.7f' % cell['lon']

            cell['radio'] = 'UMTS'
            cell_strings = [
                (field, str(value)) for (field, value) in cell.items()]
            cell_tuple = tuple(sorted(cell_strings))
            cells.add(cell_tuple)

        # add one incomplete / unprocessed cell
        CellShardFactory(cid=210, lat=None, lon=None, **cell_key)
        # add one really old cell
        CellShardFactory(cid=220, created=long_ago, modified=long_ago,
                         last_seen=long_ago.date(), **cell_key)
        session.commit()

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'export.csv.gz')
            write_stations_to_csv(session, path, today)

            with util.gzip_open(path, 'r') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    reader = csv.DictReader(gzip_file, CELL_FIELDS)

                    header = six.next(reader)
                    assert 'area' in header.values()

                    exported_cells = set()
                    for exported_cell in reader:
                        exported_cell_filtered = [
                            (field, value) for (field, value)
                            in exported_cell.items()
                            if field in cell_fixture_fields]
                        exported_cell = tuple(sorted(exported_cell_filtered))
                        exported_cells.add(exported_cell)

                    assert cells == exported_cells
Пример #13
0
    def test_local_export(self, celery, session):
        now = util.utcnow()
        today = now.date()
        long_ago = now - timedelta(days=367)
        cell_fixture_fields = (
            'radio', 'cid', 'lat', 'lon', 'mnc', 'mcc', 'lac')
        base_cell = CellShardFactory.build(radio=Radio.wcdma)
        cell_key = {'radio': Radio.wcdma, 'mcc': base_cell.mcc,
                    'mnc': base_cell.mnc, 'lac': base_cell.lac}
        cells = set()

        for cid in range(190, 200):
            cell = dict(cid=cid, lat=base_cell.lat,
                        lon=base_cell.lon, **cell_key)
            CellShardFactory(**cell)
            cell['lat'] = '%.7f' % cell['lat']
            cell['lon'] = '%.7f' % cell['lon']

            cell['radio'] = 'UMTS'
            cell_strings = [
                (field, str(value)) for (field, value) in cell.items()]
            cell_tuple = tuple(sorted(cell_strings))
            cells.add(cell_tuple)

        # add one incomplete / unprocessed cell
        CellShardFactory(cid=210, lat=None, lon=None, **cell_key)
        # add one really old cell
        CellShardFactory(cid=220, created=long_ago, modified=long_ago,
                         last_seen=long_ago.date(), **cell_key)
        session.commit()

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'export.csv.gz')
            write_stations_to_csv(session, path, today)

            with util.gzip_open(path, 'r') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    reader = csv.DictReader(gzip_file, CELL_FIELDS)

                    header = next(reader)
                    assert 'area' in header.values()

                    exported_cells = set()
                    for exported_cell in reader:
                        exported_cell_filtered = [
                            (field, value) for (field, value)
                            in exported_cell.items()
                            if field in cell_fixture_fields]
                        exported_cell = tuple(sorted(exported_cell_filtered))
                        exported_cells.add(exported_cell)

                    assert cells == exported_cells
Пример #14
0
    def test_files(self):
        today = util.utcnow().date()
        rows = [
            dict(time=today, lat=12.345, lon=12.345),
            dict(time=today, lat=0, lon=12.345),
            dict(time=today, lat=-10.000, lon=-11.000),
        ]
        for row in rows:
            lat, lon = DataMap.scale(row["lat"], row["lon"])
            data = DataMap.shard_model(lat, lon)(grid=(lat, lon), created=row["time"], modified=row["time"])
            self.session.add(data)
        self.session.flush()

        lines = []
        rows = 0
        with util.selfdestruct_tempdir() as temp_dir:
            quaddir = os.path.join(temp_dir, "quadtrees")
            os.mkdir(quaddir)
            shapes = os.path.join(temp_dir, "shapes")
            tiles = os.path.join(temp_dir, "tiles")

            for shard_id, shard in DATAMAP_SHARDS.items():
                filename = "map_%s.csv.gz" % shard_id
                filepath = os.path.join(temp_dir, filename)
                result = export_file(None, filepath, shard.__tablename__, _db_rw=_make_db(), _session=self.session)

                if not result:
                    self.assertFalse(os.path.isfile(filepath))
                    continue

                rows += result
                with util.gzip_open(filepath, "r") as fd:
                    written = fd.read()
                lines.extend([line.split(",") for line in written.split()])

                encode_file(filename, temp_dir, quaddir, DATAMAPS_DIR)

                quadfolder = os.path.join(quaddir, "map_" + shard_id)
                self.assertTrue(os.path.isdir(quadfolder))
                self._check_quadtree(quadfolder)

            merge_files(quaddir, shapes, DATAMAPS_DIR)
            self._check_quadtree(shapes)

            render_tiles(shapes, tiles, 1, 2, DATAMAPS_DIR, PNGQUANT)
            self.assertEqual(sorted(os.listdir(tiles)), ["0", "1", "2"])
            self.assertEqual(sorted(os.listdir(os.path.join(tiles, "0", "0"))), ["0.png", "*****@*****.**"])

        self.assertEqual(rows, 36)
        self.assertEqual(len(lines), 36)
        self.assertEqual(set([round(float(l[0]), 2) for l in lines]), set([-10.0, 0.0, 12.35]))
        self.assertEqual(set([round(float(l[1]), 2) for l in lines]), set([-11.0, 12.35]))
Пример #15
0
    def test_local_export(self):
        cell_fixture_fields = (
            'radio', 'cid', 'lat', 'lon', 'mnc', 'mcc', 'lac')
        base_cell = CellFactory.build(radio=Radio.wcdma)
        cell_key = {'radio': Radio.wcdma, 'mcc': base_cell.mcc,
                    'mnc': base_cell.mnc, 'lac': base_cell.lac}
        cells = set()

        for cid in range(190, 200):
            cell = dict(cid=cid, lat=base_cell.lat,
                        lon=base_cell.lon, **cell_key)
            CellFactory(**cell)
            cell['lat'] = '%.7f' % cell['lat']
            cell['lon'] = '%.7f' % cell['lon']

            cell['radio'] = 'UMTS'
            cell_strings = [
                (field, str(value)) for (field, value) in cell.items()]
            cell_tuple = tuple(sorted(cell_strings))
            cells.add(cell_tuple)

        # add one incomplete / unprocessed cell
        CellFactory(cid=210, lat=None, lon=None, **cell_key)
        self.session.commit()

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'export.csv.gz')
            write_stations_to_csv(self.session, path)

            with util.gzip_open(path, 'r') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    reader = csv.DictReader(gzip_file, CELL_FIELDS)

                    header = six.next(reader)
                    self.assertTrue('area' in header.values())
                    self.assertEqual(header, CELL_HEADER_DICT)

                    exported_cells = set()
                    for exported_cell in reader:
                        exported_cell_filtered = [
                            (field, value) for (field, value)
                            in exported_cell.items()
                            if field in cell_fixture_fields]
                        exported_cell = tuple(sorted(exported_cell_filtered))
                        exported_cells.add(exported_cell)

                    self.assertEqual(cells, exported_cells)
Пример #16
0
    def get_csv(self, lo=1, hi=10, time=1408604686):
        cell = self.cell
        line_template = ('UMTS,{mcc},{mnc},{lac},{cid},{psc},{lon:.7f},'
                         '{lat:.7f},1,1,1,{time},{time},')
        lines = [
            line_template.format(mcc=cell.mcc,
                                 mnc=cell.mnc,
                                 lac=cell.lac,
                                 cid=i * 1010,
                                 psc='',
                                 lon=cell.lon + i * 0.002,
                                 lat=cell.lat + i * 0.001,
                                 time=time) for i in range(lo, hi)
        ]
        # add bad lines
        lines.append(
            line_template.format(
                mcc=cell.mcc,
                mnc=cell.mnc,
                lac='',
                cid='',
                psc=12,
                lon=cell.lon,
                lat=cell.lat,
                time=time,
            ))
        lines.append(
            line_template.format(
                mcc=cell.mcc,
                mnc=cell.mnc,
                lac='',
                cid='',
                psc='',
                lon=cell.lon,
                lat=cell.lat,
                time=time,
            ))
        txt = '\n'.join(lines)

        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, 'import.csv.gz')
            with util.gzip_open(path, 'w') as gzip_wrapper:
                with gzip_wrapper as gzip_file:
                    gzip_file.write(txt)
            yield path
Пример #17
0
    def _export(self, session, datatype, expected_keys, restrict=False):
        with util.selfdestruct_tempdir() as temp_dir:
            path = os.path.join(temp_dir, datatype + ".tar.gz")
            if restrict:
                dump.dump_file(datatype,
                               session,
                               path,
                               lat=GB_LAT,
                               lon=GB_LON,
                               radius=25000)
            else:
                dump.dump_file(datatype, session, path)

            assert os.path.isfile(path)
            with util.gzip_open(path, "r") as fd:
                lines = fd.readlines()
                assert len(lines) == len(expected_keys) + 1
                for key in expected_keys:
                    assert [True for line in lines if key in line] == [True]
Пример #18
0
    def test_main(self, raven):
        with util.selfdestruct_tempdir() as temp_dir:
            mock_generate = MagicMock()
            with patch.object(datamap, "generate", mock_generate):
                argv = [
                    "bin/location_map",
                    "--create",
                    "--upload",
                    "--concurrency=1",
                    "--output=%s" % temp_dir,
                ]
                main(argv, _raven_client=raven, _bucketname="bucket")

                assert len(mock_generate.mock_calls) == 1
                args, kw = mock_generate.call_args

                assert kw["concurrency"] == 1
                assert kw["output"] == temp_dir
                assert kw["upload"] is True
Пример #19
0
    def test_main(self):
        with util.selfdestruct_tempdir() as temp_dir:
            mock_generate = MagicMock()
            with patch.object(datamap, "generate", mock_generate):
                argv = [
                    "bin/location_map",
                    "--create",
                    "--upload",
                    "--concurrency=1",
                    "--datamaps=%s/datamaps" % temp_dir,
                    "--output=%s" % temp_dir,
                ]
                main(argv, _raven_client=self.raven_client, _stats_client=self.stats_client)

                self.assertEqual(len(mock_generate.mock_calls), 1)
                args, kw = mock_generate.call_args

                self.assertEqual(kw["concurrency"], 1)
                self.assertEqual(kw["datamaps"], temp_dir + "/datamaps")
                self.assertEqual(kw["output"], temp_dir)
                self.assertEqual(kw["upload"], True)
Пример #20
0
    def test_main(self, raven, stats):
        with util.selfdestruct_tempdir() as temp_dir:
            mock_generate = MagicMock()
            with patch.object(datamap, 'generate', mock_generate):
                argv = [
                    'bin/location_map',
                    '--create',
                    '--upload',
                    '--concurrency=1',
                    '--output=%s' % temp_dir,
                ]
                main(argv,
                     _raven_client=raven,
                     _stats_client=stats,
                     _bucketname='bucket')

                assert len(mock_generate.mock_calls) == 1
                args, kw = mock_generate.call_args

                assert kw['concurrency'] == 1
                assert kw['output'] == temp_dir
                assert kw['upload'] is True
Пример #21
0
    def test_main(self, raven, stats):
        with util.selfdestruct_tempdir() as temp_dir:
            mock_generate = MagicMock()
            with patch.object(datamap, 'generate', mock_generate):
                argv = [
                    'bin/location_map',
                    '--create',
                    '--upload',
                    '--concurrency=1',
                    '--output=%s' % temp_dir,
                ]
                main(argv,
                     _raven_client=raven,
                     _stats_client=stats,
                     _bucketname='bucket')

                assert len(mock_generate.mock_calls) == 1
                args, kw = mock_generate.call_args

                assert kw['concurrency'] == 1
                assert kw['output'] == temp_dir
                assert kw['upload'] is True
Пример #22
0
    def test_main(self):
        with util.selfdestruct_tempdir() as temp_dir:
            mock_generate = MagicMock()
            with patch.object(datamap, 'generate', mock_generate):
                argv = [
                    'bin/location_map',
                    '--create',
                    '--upload',
                    '--concurrency=1',
                    '--datamaps=%s/datamaps' % temp_dir,
                    '--output=%s' % temp_dir,
                ]
                main(argv,
                     _raven_client=self.raven_client,
                     _stats_client=self.stats_client)

                self.assertEqual(len(mock_generate.mock_calls), 1)
                args, kw = mock_generate.call_args

                self.assertEqual(kw['concurrency'], 1)
                self.assertEqual(kw['datamaps'], temp_dir + '/datamaps')
                self.assertEqual(kw['output'], temp_dir)
                self.assertEqual(kw['upload'], True)
Пример #23
0
    def test_main(self):
        with util.selfdestruct_tempdir() as temp_dir:
            mock_generate = MagicMock()
            with patch.object(datamap, 'generate', mock_generate):
                argv = [
                    'bin/location_map',
                    '--create',
                    '--upload',
                    '--concurrency=1',
                    '--datamaps=%s/datamaps' % temp_dir,
                    '--output=%s' % temp_dir,
                ]
                main(argv,
                     _raven_client=self.raven_client,
                     _stats_client=self.stats_client)

                self.assertEqual(len(mock_generate.mock_calls), 1)
                args, kw = mock_generate.call_args

                self.assertEqual(kw['concurrency'], 1)
                self.assertEqual(kw['datamaps'], temp_dir + '/datamaps')
                self.assertEqual(kw['output'], temp_dir)
                self.assertEqual(kw['upload'], True)
Пример #24
0
def generate(db_url, bucketname, raven_client, stats_client,
             upload=True, concurrency=2, max_zoom=13,
             datamaps='', output=None):  # pragma: no cover
    with util.selfdestruct_tempdir() as workdir:
        pool = billiard.Pool(processes=concurrency)

        if output:
            basedir = output
        else:
            basedir = workdir

        if not os.path.isdir(basedir):
            os.makedirs(basedir)

        # Concurrently export datamap table to CSV files.
        csvdir = os.path.join(basedir, 'csv')
        if not os.path.isdir(csvdir):
            os.mkdir(csvdir)

        with stats_client.timed('datamaps', tags=['func:export']):
            result_rows = export_files(pool, db_url, csvdir)

        stats_client.timing('datamaps', result_rows, tags=['count:csv_rows'])

        # Concurrently create quadtrees out of CSV files.
        quaddir = os.path.join(basedir, 'quadtrees')
        if os.path.isdir(quaddir):
            shutil.rmtree(quaddir)
        os.mkdir(quaddir)

        with stats_client.timed('datamaps', tags=['func:encode']):
            quadtrees = encode_files(pool, csvdir, quaddir, datamaps)

        stats_client.timing('datamaps', quadtrees, tags=['count:quadtrees'])

        pool.close()
        pool.join()

        # Merge quadtrees and make points unique. This process cannot
        # be made concurrent.
        shapes = os.path.join(basedir, 'shapes')
        if os.path.isdir(shapes):
            shutil.rmtree(shapes)

        with stats_client.timed('datamaps', tags=['func:merge']):
            merge_files(quaddir, shapes, datamaps)

        # Render tiles, using xargs -P to get concurrency.
        tiles = os.path.abspath(os.path.join(basedir, 'tiles'))

        with stats_client.timed('datamaps', tags=['func:render']):
            render_tiles(shapes, tiles, concurrency, max_zoom,
                         datamaps, 'pngquant')

        if upload:
            # The upload process is largely network I/O bound, so we
            # can use more processes compared to the CPU bound tasks.
            pool = billiard.Pool(processes=concurrency * 2)

            with stats_client.timed('datamaps', tags=['func:upload']):
                result = upload_files(pool, bucketname, tiles, max_zoom)

            pool.close()
            pool.join()

            for metric, value in result.items():
                stats_client.timing('datamaps', value,
                                    tags=['count:%s' % metric])
Пример #25
0
def temp_dir():
    with util.selfdestruct_tempdir() as temp_dir:
        yield temp_dir
Пример #26
0
def main(_argv=None, _raven_client=None, _bucket_name=None):
    """
    Command-line entry point.

    :param _argv: Simulated sys.argv[1:] arguments for testing
    :param _raven_client: override Raven client for testing
    :param _bucket_name: override S3 bucket name for testing
    :return: A system exit code
    :rtype: int
    """

    # Parse the command line
    parser = get_parser()
    args = parser.parse_args(_argv)
    create = args.create
    upload = args.upload
    concurrency = args.concurrency
    verbose = args.verbose

    # Setup basic services
    if verbose:
        configure_logging(local_dev_env=True, logging_level="DEBUG")
    else:
        configure_logging()
    raven_client = configure_raven(
        transport="sync", tags={"app": "datamap"}, _client=_raven_client
    )

    # Check consistent output_dir, create, upload
    exit_early = 0
    output_dir = None
    if args.output:
        output_dir = os.path.abspath(args.output)
        tiles_dir = os.path.join(output_dir, "tiles")
        if not create and not os.path.isdir(tiles_dir):
            LOG.error(
                "The tiles subfolder of the --output directory should already"
                " exist when calling --upload without --create, to avoid"
                " deleting files from the S3 bucket.",
                tiles_dir=tiles_dir,
            )
            exit_early = 1
    else:
        if create and not upload:
            LOG.error(
                "The --output argument is required with --create but without"
                " --upload, since the temporary folder is removed at exit."
            )
            exit_early = 1

        if upload and not create:
            LOG.error(
                "The --output argument is required with --upload but without"
                " --create, to avoid deleting all tiles in the S3 bucket."
            )
            exit_early = 1

    # Exit early with help message if error or nothing to do
    if exit_early or not (create or upload):
        parser.print_help()
        return exit_early

    # Determine the S3 bucket name
    bucket_name = _bucket_name
    if not _bucket_name:
        bucket_name = settings("asset_bucket")
        if bucket_name:
            bucket_name = bucket_name.strip("/")

    # Check that the implied credentials are authorized to use the bucket
    if upload:
        if not bucket_name:
            LOG.error("Unable to determine upload bucket_name.")
            return 1
        else:
            works, fail_msg = check_bucket(bucket_name)
            if not works:
                LOG.error(
                    f"Bucket {bucket_name} can not be used for uploads: {fail_msg}"
                )
                return 1

    # Generate and upload the tiles
    success = True
    interrupted = False
    result = {}
    try:
        with Timer() as timer:
            if output_dir:
                result = generate(
                    output_dir,
                    bucket_name,
                    raven_client,
                    create=create,
                    upload=upload,
                    concurrency=concurrency,
                )
            else:
                with util.selfdestruct_tempdir() as temp_dir:
                    result = generate(
                        temp_dir,
                        bucket_name,
                        raven_client,
                        create=create,
                        upload=upload,
                        concurrency=concurrency,
                    )
    except KeyboardInterrupt:
        interrupted = True
        success = False
    except Exception:
        raven_client.captureException()
        success = False
        raise
    finally:
        if create and upload:
            task = "generation and upload"
        elif create:
            task = "generation"
        else:
            task = "upload"
        if interrupted:
            complete = "interrupted"
        elif success:
            complete = "complete"
        else:
            complete = "failed"
        final_log = structlog.get_logger("canonical-log-line")
        final_log.info(
            f"Datamap tile {task} {complete} in {timer.duration_s:0.1f} seconds.",
            success=success,
            duration_s=timer.duration_s,
            script_name="ichnaea.scripts.datamap",
            create=create,
            upload=upload,
            concurrency=concurrency,
            bucket_name=bucket_name,
            **result,
        )

    return 0
Пример #27
0
def generate(bucketname,
             raven_client,
             upload=True,
             concurrency=2,
             max_zoom=11,
             output=None):
    with util.selfdestruct_tempdir() as workdir:
        pool = billiard.Pool(processes=concurrency)

        if output:
            basedir = output
        else:
            basedir = workdir

        if not os.path.isdir(basedir):
            os.makedirs(basedir)

        # Concurrently export datamap table to CSV files.
        csvdir = os.path.join(basedir, "csv")
        if not os.path.isdir(csvdir):
            os.mkdir(csvdir)

        with METRICS.timer("datamaps", tags=["func:export"]):
            result_rows = export_files(pool, csvdir)

        METRICS.timing("datamaps", result_rows, tags=["count:csv_rows"])

        # Concurrently create quadtrees out of CSV files.
        quaddir = os.path.join(basedir, "quadtrees")
        if os.path.isdir(quaddir):
            shutil.rmtree(quaddir)
        os.mkdir(quaddir)

        with METRICS.timer("datamaps", tags=["func:encode"]):
            quadtrees = encode_files(pool, csvdir, quaddir)

        METRICS.timing("datamaps", quadtrees, tags=["count:quadtrees"])

        pool.close()
        pool.join()

        # Merge quadtrees and make points unique. This process cannot
        # be made concurrent.
        shapes = os.path.join(basedir, "shapes")
        if os.path.isdir(shapes):
            shutil.rmtree(shapes)

        with METRICS.timer("datamaps", tags=["func:merge"]):
            merge_files(quaddir, shapes)

        # Render tiles, using xargs -P to get concurrency.
        tiles = os.path.abspath(os.path.join(basedir, "tiles"))

        with METRICS.timer("datamaps", tags=["func:render"]):
            render_tiles(shapes, tiles, concurrency, max_zoom)

        if upload:
            # The upload process is largely network I/O bound, so we
            # can use more processes compared to the CPU bound tasks.
            pool = billiard.Pool(processes=concurrency * 2)

            with METRICS.timer("datamaps", tags=["func:upload"]):
                result = upload_files(pool, bucketname, tiles, max_zoom,
                                      raven_client)

            pool.close()
            pool.join()

            for metric, value in result.items():
                METRICS.timing("datamaps", value, tags=["count:%s" % metric])