def test_files(self, session): today = util.utcnow().date() rows = [ dict(time=today, lat=12.345, lon=12.345), dict(time=today, lat=0, lon=12.345), dict(time=today, lat=-10.000, lon=-11.000), ] for row in rows: lat, lon = DataMap.scale(row["lat"], row["lon"]) data = DataMap.shard_model(lat, lon)( grid=(lat, lon), created=row["time"], modified=row["time"] ) session.add(data) session.flush() lines = [] rows = 0 with util.selfdestruct_tempdir() as temp_dir: quaddir = os.path.join(temp_dir, "quadtrees") os.mkdir(quaddir) shapes = os.path.join(temp_dir, "shapes") tiles = os.path.join(temp_dir, "tiles") for shard_id, shard in DataMap.shards().items(): filename = "map_%s.csv.gz" % shard_id filepath = os.path.join(temp_dir, filename) result = export_file(filepath, shard.__tablename__, _session=session) if not result: assert not os.path.isfile(filepath) continue rows += result with util.gzip_open(filepath, "r") as fd: written = fd.read() lines.extend([line.split(",") for line in written.split()]) encode_file(filename, temp_dir, quaddir) quadfolder = os.path.join(quaddir, "map_" + shard_id) assert os.path.isdir(quadfolder) self._check_quadtree(quadfolder) merge_files(quaddir, shapes) self._check_quadtree(shapes) render_tiles(shapes, tiles, 1, 2) assert sorted(os.listdir(tiles)) == ["0", "1", "2"] assert sorted(os.listdir(os.path.join(tiles, "0", "0"))) == [ "0.png", "*****@*****.**", ] assert rows == 18 assert len(lines) == 18 lats = [round(float(line[0]), 2) for line in lines] longs = [round(float(line[1]), 2) for line in lines] assert set(lats) == set([-10.0, 0.0, 12.35]) assert set(longs) == set([-11.0, 12.35])
def __call__(self, hourly=True, _bucket=None): if _bucket is None: bucket = settings("asset_bucket") else: bucket = _bucket if not bucket: return now = util.utcnow() today = now.date() start_time = None end_time = None if hourly: end_time = now.replace(minute=0, second=0) file_time = end_time file_type = "diff" start_time = end_time - timedelta(hours=1) else: file_time = now.replace(hour=0, minute=0, second=0) file_type = "full" filename = "MLS-%s-cell-export-" % file_type filename = filename + file_time.strftime("%Y-%m-%dT%H0000.csv.gz") with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, filename) with self.task.db_session(commit=False) as session: write_stations_to_csv(session, path, today, start_time=start_time, end_time=end_time) self.write_stations_to_s3(path, bucket)
def __call__(self, diff=True, _filename=None): url = self.settings.get('url') apikey = self.settings.get('apikey') if not url or not apikey: # pragma: no cover return if _filename is None: if diff: prev_hour = util.utcnow() - timedelta(hours=1) _filename = prev_hour.strftime( 'cell_towers_diff-%Y%m%d%H.csv.gz') else: # pragma: no cover _filename = 'cell_towers.csv.gz' with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, _filename) with open(path, 'wb') as temp_file: with closing( requests.get(url, params={ 'apiKey': apikey, 'filename': _filename }, stream=True)) as req: for chunk in req.iter_content(chunk_size=2**20): temp_file.write(chunk) temp_file.flush() with self.task.redis_pipeline() as pipe: with self.task.db_session() as session: self.import_stations(session, pipe, path)
def __call__(self, diff=True, _filename=None): url = self.settings.get('url') apikey = self.settings.get('apikey') if not url or not apikey: # pragma: no cover return if _filename is None: if diff: prev_hour = util.utcnow() - timedelta(hours=1) _filename = prev_hour.strftime( 'cell_towers_diff-%Y%m%d%H.csv.gz') else: # pragma: no cover _filename = 'cell_towers.csv.gz' with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, _filename) with open(path, 'wb') as temp_file: with closing(requests.get(url, params={'apiKey': apikey, 'filename': _filename}, stream=True)) as req: for chunk in req.iter_content(chunk_size=2 ** 20): temp_file.write(chunk) temp_file.flush() with self.task.redis_pipeline() as pipe: with self.task.db_session() as session: self.import_stations(session, pipe, path)
def __call__(self, hourly=True, _bucket=None): if _bucket is None: # pragma: no cover bucket = self.settings['bucket'] else: bucket = _bucket if not bucket: # pragma: no cover return now = util.utcnow() start_time = None end_time = None if hourly: end_time = now.replace(minute=0, second=0) file_time = end_time file_type = 'diff' start_time = end_time - timedelta(hours=1) else: file_time = now.replace(hour=0, minute=0, second=0) file_type = 'full' filename = 'MLS-%s-cell-export-' % file_type filename = filename + file_time.strftime('%Y-%m-%dT%H0000.csv.gz') with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, filename) with self.task.db_session(commit=False) as session: write_stations_to_csv( session, path, start_time=start_time, end_time=end_time) self.write_stations_to_s3(path, bucket)
def get_csv(self, lo=1, hi=10, time=1408604686): cell = self.cell line_template = ('UMTS,{mcc},{mnc},{lac},{cid},{psc},{lon:.7f},' '{lat:.7f},1,1,1,{time},{time},') lines = [line_template.format( mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=i * 1010, psc='', lon=cell.lon + i * 0.002, lat=cell.lat + i * 0.001, time=time) for i in range(lo, hi)] # add bad lines lines.append(line_template.format( mcc=cell.mcc, mnc=cell.mnc, lac='', cid='', psc=12, lon=cell.lon, lat=cell.lat, time=time, )) lines.append(line_template.format( mcc=cell.mcc, mnc=cell.mnc, lac='', cid='', psc='', lon=cell.lon, lat=cell.lat, time=time, )) txt = '\n'.join(lines) with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, 'import.csv.gz') with util.gzip_open(path, 'w') as gzip_wrapper: with gzip_wrapper as gzip_file: gzip_file.write(txt) yield path
def __call__(self, hourly=True, _bucket=None): if _bucket is None: # pragma: no cover bucket = self.settings['bucket'] else: bucket = _bucket if not bucket: # pragma: no cover return now = util.utcnow() start_time = None end_time = None if hourly: end_time = now.replace(minute=0, second=0) file_time = end_time file_type = 'diff' start_time = end_time - timedelta(hours=1) else: file_time = now.replace(hour=0, minute=0, second=0) file_type = 'full' filename = 'MLS-%s-cell-export-' % file_type filename = filename + file_time.strftime('%Y-%m-%dT%H0000.csv.gz') with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, filename) with self.task.db_session(commit=False) as session: write_stations_to_csv(session, path, start_time=start_time, end_time=end_time) self.write_stations_to_s3(path, bucket)
def test_files(self, db_rw, session): today = util.utcnow().date() rows = [ dict(time=today, lat=12.345, lon=12.345), dict(time=today, lat=0, lon=12.345), dict(time=today, lat=-10.000, lon=-11.000), ] for row in rows: lat, lon = DataMap.scale(row['lat'], row['lon']) data = DataMap.shard_model(lat, lon)( grid=(lat, lon), created=row['time'], modified=row['time']) session.add(data) session.flush() lines = [] rows = 0 db_url = str(db_rw.engine.url) with util.selfdestruct_tempdir() as temp_dir: quaddir = os.path.join(temp_dir, 'quadtrees') os.mkdir(quaddir) shapes = os.path.join(temp_dir, 'shapes') tiles = os.path.join(temp_dir, 'tiles') for shard_id, shard in DataMap.shards().items(): filename = 'map_%s.csv.gz' % shard_id filepath = os.path.join(temp_dir, filename) result = export_file( db_url, filepath, shard.__tablename__, _session=session) if not result: assert not os.path.isfile(filepath) continue rows += result with util.gzip_open(filepath, 'r') as fd: written = fd.read() lines.extend([line.split(',') for line in written.split()]) encode_file(filename, temp_dir, quaddir, DATAMAPS_DIR) quadfolder = os.path.join(quaddir, 'map_' + shard_id) assert os.path.isdir(quadfolder) self._check_quadtree(quadfolder) merge_files(quaddir, shapes, DATAMAPS_DIR) self._check_quadtree(shapes) render_tiles(shapes, tiles, 1, 2, DATAMAPS_DIR, PNGQUANT) assert (sorted(os.listdir(tiles)) == ['0', '1', '2']) assert (sorted(os.listdir(os.path.join(tiles, '0', '0'))) == ['0.png', '*****@*****.**']) assert rows == 36 assert len(lines) == 36 assert (set([round(float(l[0]), 2) for l in lines]) == set([-10.0, 0.0, 12.35])) assert (set([round(float(l[1]), 2) for l in lines]) == set([-11.0, 12.35]))
def test_local_export(self, celery, session): now = util.utcnow() today = now.date() long_ago = now - timedelta(days=367) cell_fixture_fields = ("radio", "cid", "lat", "lon", "mnc", "mcc", "lac") base_cell = CellShardFactory.build(radio=Radio.wcdma) cell_key = { "radio": Radio.wcdma, "mcc": base_cell.mcc, "mnc": base_cell.mnc, "lac": base_cell.lac, } cells = set() for cid in range(190, 200): cell = dict(cid=cid, lat=base_cell.lat, lon=base_cell.lon, **cell_key) CellShardFactory(**cell) cell["lat"] = "%.7f" % cell["lat"] cell["lon"] = "%.7f" % cell["lon"] cell["radio"] = "UMTS" cell_strings = [(field, str(value)) for (field, value) in cell.items()] cell_tuple = tuple(sorted(cell_strings)) cells.add(cell_tuple) # add one incomplete / unprocessed cell CellShardFactory(cid=210, lat=None, lon=None, **cell_key) # add one really old cell CellShardFactory( cid=220, created=long_ago, modified=long_ago, last_seen=long_ago.date(), **cell_key, ) session.commit() with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, "export.csv.gz") write_stations_to_csv(session, path, today) with util.gzip_open(path, "r") as gzip_wrapper: with gzip_wrapper as gzip_file: reader = csv.DictReader(gzip_file, CELL_FIELDS) header = next(reader) assert "area" in header.values() exported_cells = set() for exported_cell in reader: exported_cell_filtered = [ (field, value) for (field, value) in exported_cell.items() if field in cell_fixture_fields ] exported_cell = tuple(sorted(exported_cell_filtered)) exported_cells.add(exported_cell) assert cells == exported_cells
def test_files(self, db, session): # pragma: no cover today = util.utcnow().date() rows = [ dict(time=today, lat=12.345, lon=12.345), dict(time=today, lat=0, lon=12.345), dict(time=today, lat=-10.000, lon=-11.000), ] for row in rows: lat, lon = DataMap.scale(row['lat'], row['lon']) data = DataMap.shard_model(lat, lon)(grid=(lat, lon), created=row['time'], modified=row['time']) session.add(data) session.flush() lines = [] rows = 0 with util.selfdestruct_tempdir() as temp_dir: quaddir = os.path.join(temp_dir, 'quadtrees') os.mkdir(quaddir) shapes = os.path.join(temp_dir, 'shapes') tiles = os.path.join(temp_dir, 'tiles') for shard_id, shard in DataMap.shards().items(): filename = 'map_%s.csv.gz' % shard_id filepath = os.path.join(temp_dir, filename) result = export_file(filepath, shard.__tablename__, _session=session) if not result: assert not os.path.isfile(filepath) continue rows += result with util.gzip_open(filepath, 'r') as fd: written = fd.read() lines.extend([line.split(',') for line in written.split()]) encode_file(filename, temp_dir, quaddir) quadfolder = os.path.join(quaddir, 'map_' + shard_id) assert os.path.isdir(quadfolder) self._check_quadtree(quadfolder) merge_files(quaddir, shapes) self._check_quadtree(shapes) render_tiles(shapes, tiles, 1, 2) assert (sorted(os.listdir(tiles)) == ['0', '1', '2']) assert (sorted(os.listdir(os.path.join( tiles, '0', '0'))) == ['0.png', '*****@*****.**']) assert rows == 18 assert len(lines) == 18 assert (set([round(float(l[0]), 2) for l in lines]) == set([-10.0, 0.0, 12.35])) assert (set([round(float(l[1]), 2) for l in lines]) == set([-11.0, 12.35]))
def test_local_export(self): cell_fixture_fields = ('radio', 'cid', 'lat', 'lon', 'mnc', 'mcc', 'lac') base_cell = CellShardFactory.build(radio=Radio.wcdma) cell_key = { 'radio': Radio.wcdma, 'mcc': base_cell.mcc, 'mnc': base_cell.mnc, 'lac': base_cell.lac } cells = set() for cid in range(190, 200): cell = dict(cid=cid, lat=base_cell.lat, lon=base_cell.lon, **cell_key) CellShardFactory(**cell) cell['lat'] = '%.7f' % cell['lat'] cell['lon'] = '%.7f' % cell['lon'] cell['radio'] = 'UMTS' cell_strings = [(field, str(value)) for (field, value) in cell.items()] cell_tuple = tuple(sorted(cell_strings)) cells.add(cell_tuple) # add one incomplete / unprocessed cell CellShardFactory(cid=210, lat=None, lon=None, **cell_key) self.session.commit() with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, 'export.csv.gz') write_stations_to_csv(self.session, path) with util.gzip_open(path, 'r') as gzip_wrapper: with gzip_wrapper as gzip_file: reader = csv.DictReader(gzip_file, CELL_FIELDS) header = six.next(reader) self.assertTrue('area' in header.values()) exported_cells = set() for exported_cell in reader: exported_cell_filtered = [ (field, value) for (field, value) in exported_cell.items() if field in cell_fixture_fields ] exported_cell = tuple(sorted(exported_cell_filtered)) exported_cells.add(exported_cell) self.assertEqual(cells, exported_cells)
def test_local_export(self, celery, session): now = util.utcnow() today = now.date() long_ago = now - timedelta(days=367) cell_fixture_fields = ( 'radio', 'cid', 'lat', 'lon', 'mnc', 'mcc', 'lac') base_cell = CellShardFactory.build(radio=Radio.wcdma) cell_key = {'radio': Radio.wcdma, 'mcc': base_cell.mcc, 'mnc': base_cell.mnc, 'lac': base_cell.lac} cells = set() for cid in range(190, 200): cell = dict(cid=cid, lat=base_cell.lat, lon=base_cell.lon, **cell_key) CellShardFactory(**cell) cell['lat'] = '%.7f' % cell['lat'] cell['lon'] = '%.7f' % cell['lon'] cell['radio'] = 'UMTS' cell_strings = [ (field, str(value)) for (field, value) in cell.items()] cell_tuple = tuple(sorted(cell_strings)) cells.add(cell_tuple) # add one incomplete / unprocessed cell CellShardFactory(cid=210, lat=None, lon=None, **cell_key) # add one really old cell CellShardFactory(cid=220, created=long_ago, modified=long_ago, last_seen=long_ago.date(), **cell_key) session.commit() with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, 'export.csv.gz') write_stations_to_csv(session, path, today) with util.gzip_open(path, 'r') as gzip_wrapper: with gzip_wrapper as gzip_file: reader = csv.DictReader(gzip_file, CELL_FIELDS) header = six.next(reader) assert 'area' in header.values() exported_cells = set() for exported_cell in reader: exported_cell_filtered = [ (field, value) for (field, value) in exported_cell.items() if field in cell_fixture_fields] exported_cell = tuple(sorted(exported_cell_filtered)) exported_cells.add(exported_cell) assert cells == exported_cells
def test_local_export(self, celery, session): now = util.utcnow() today = now.date() long_ago = now - timedelta(days=367) cell_fixture_fields = ( 'radio', 'cid', 'lat', 'lon', 'mnc', 'mcc', 'lac') base_cell = CellShardFactory.build(radio=Radio.wcdma) cell_key = {'radio': Radio.wcdma, 'mcc': base_cell.mcc, 'mnc': base_cell.mnc, 'lac': base_cell.lac} cells = set() for cid in range(190, 200): cell = dict(cid=cid, lat=base_cell.lat, lon=base_cell.lon, **cell_key) CellShardFactory(**cell) cell['lat'] = '%.7f' % cell['lat'] cell['lon'] = '%.7f' % cell['lon'] cell['radio'] = 'UMTS' cell_strings = [ (field, str(value)) for (field, value) in cell.items()] cell_tuple = tuple(sorted(cell_strings)) cells.add(cell_tuple) # add one incomplete / unprocessed cell CellShardFactory(cid=210, lat=None, lon=None, **cell_key) # add one really old cell CellShardFactory(cid=220, created=long_ago, modified=long_ago, last_seen=long_ago.date(), **cell_key) session.commit() with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, 'export.csv.gz') write_stations_to_csv(session, path, today) with util.gzip_open(path, 'r') as gzip_wrapper: with gzip_wrapper as gzip_file: reader = csv.DictReader(gzip_file, CELL_FIELDS) header = next(reader) assert 'area' in header.values() exported_cells = set() for exported_cell in reader: exported_cell_filtered = [ (field, value) for (field, value) in exported_cell.items() if field in cell_fixture_fields] exported_cell = tuple(sorted(exported_cell_filtered)) exported_cells.add(exported_cell) assert cells == exported_cells
def test_files(self): today = util.utcnow().date() rows = [ dict(time=today, lat=12.345, lon=12.345), dict(time=today, lat=0, lon=12.345), dict(time=today, lat=-10.000, lon=-11.000), ] for row in rows: lat, lon = DataMap.scale(row["lat"], row["lon"]) data = DataMap.shard_model(lat, lon)(grid=(lat, lon), created=row["time"], modified=row["time"]) self.session.add(data) self.session.flush() lines = [] rows = 0 with util.selfdestruct_tempdir() as temp_dir: quaddir = os.path.join(temp_dir, "quadtrees") os.mkdir(quaddir) shapes = os.path.join(temp_dir, "shapes") tiles = os.path.join(temp_dir, "tiles") for shard_id, shard in DATAMAP_SHARDS.items(): filename = "map_%s.csv.gz" % shard_id filepath = os.path.join(temp_dir, filename) result = export_file(None, filepath, shard.__tablename__, _db_rw=_make_db(), _session=self.session) if not result: self.assertFalse(os.path.isfile(filepath)) continue rows += result with util.gzip_open(filepath, "r") as fd: written = fd.read() lines.extend([line.split(",") for line in written.split()]) encode_file(filename, temp_dir, quaddir, DATAMAPS_DIR) quadfolder = os.path.join(quaddir, "map_" + shard_id) self.assertTrue(os.path.isdir(quadfolder)) self._check_quadtree(quadfolder) merge_files(quaddir, shapes, DATAMAPS_DIR) self._check_quadtree(shapes) render_tiles(shapes, tiles, 1, 2, DATAMAPS_DIR, PNGQUANT) self.assertEqual(sorted(os.listdir(tiles)), ["0", "1", "2"]) self.assertEqual(sorted(os.listdir(os.path.join(tiles, "0", "0"))), ["0.png", "*****@*****.**"]) self.assertEqual(rows, 36) self.assertEqual(len(lines), 36) self.assertEqual(set([round(float(l[0]), 2) for l in lines]), set([-10.0, 0.0, 12.35])) self.assertEqual(set([round(float(l[1]), 2) for l in lines]), set([-11.0, 12.35]))
def test_local_export(self): cell_fixture_fields = ( 'radio', 'cid', 'lat', 'lon', 'mnc', 'mcc', 'lac') base_cell = CellFactory.build(radio=Radio.wcdma) cell_key = {'radio': Radio.wcdma, 'mcc': base_cell.mcc, 'mnc': base_cell.mnc, 'lac': base_cell.lac} cells = set() for cid in range(190, 200): cell = dict(cid=cid, lat=base_cell.lat, lon=base_cell.lon, **cell_key) CellFactory(**cell) cell['lat'] = '%.7f' % cell['lat'] cell['lon'] = '%.7f' % cell['lon'] cell['radio'] = 'UMTS' cell_strings = [ (field, str(value)) for (field, value) in cell.items()] cell_tuple = tuple(sorted(cell_strings)) cells.add(cell_tuple) # add one incomplete / unprocessed cell CellFactory(cid=210, lat=None, lon=None, **cell_key) self.session.commit() with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, 'export.csv.gz') write_stations_to_csv(self.session, path) with util.gzip_open(path, 'r') as gzip_wrapper: with gzip_wrapper as gzip_file: reader = csv.DictReader(gzip_file, CELL_FIELDS) header = six.next(reader) self.assertTrue('area' in header.values()) self.assertEqual(header, CELL_HEADER_DICT) exported_cells = set() for exported_cell in reader: exported_cell_filtered = [ (field, value) for (field, value) in exported_cell.items() if field in cell_fixture_fields] exported_cell = tuple(sorted(exported_cell_filtered)) exported_cells.add(exported_cell) self.assertEqual(cells, exported_cells)
def get_csv(self, lo=1, hi=10, time=1408604686): cell = self.cell line_template = ('UMTS,{mcc},{mnc},{lac},{cid},{psc},{lon:.7f},' '{lat:.7f},1,1,1,{time},{time},') lines = [ line_template.format(mcc=cell.mcc, mnc=cell.mnc, lac=cell.lac, cid=i * 1010, psc='', lon=cell.lon + i * 0.002, lat=cell.lat + i * 0.001, time=time) for i in range(lo, hi) ] # add bad lines lines.append( line_template.format( mcc=cell.mcc, mnc=cell.mnc, lac='', cid='', psc=12, lon=cell.lon, lat=cell.lat, time=time, )) lines.append( line_template.format( mcc=cell.mcc, mnc=cell.mnc, lac='', cid='', psc='', lon=cell.lon, lat=cell.lat, time=time, )) txt = '\n'.join(lines) with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, 'import.csv.gz') with util.gzip_open(path, 'w') as gzip_wrapper: with gzip_wrapper as gzip_file: gzip_file.write(txt) yield path
def _export(self, session, datatype, expected_keys, restrict=False): with util.selfdestruct_tempdir() as temp_dir: path = os.path.join(temp_dir, datatype + ".tar.gz") if restrict: dump.dump_file(datatype, session, path, lat=GB_LAT, lon=GB_LON, radius=25000) else: dump.dump_file(datatype, session, path) assert os.path.isfile(path) with util.gzip_open(path, "r") as fd: lines = fd.readlines() assert len(lines) == len(expected_keys) + 1 for key in expected_keys: assert [True for line in lines if key in line] == [True]
def test_main(self, raven): with util.selfdestruct_tempdir() as temp_dir: mock_generate = MagicMock() with patch.object(datamap, "generate", mock_generate): argv = [ "bin/location_map", "--create", "--upload", "--concurrency=1", "--output=%s" % temp_dir, ] main(argv, _raven_client=raven, _bucketname="bucket") assert len(mock_generate.mock_calls) == 1 args, kw = mock_generate.call_args assert kw["concurrency"] == 1 assert kw["output"] == temp_dir assert kw["upload"] is True
def test_main(self): with util.selfdestruct_tempdir() as temp_dir: mock_generate = MagicMock() with patch.object(datamap, "generate", mock_generate): argv = [ "bin/location_map", "--create", "--upload", "--concurrency=1", "--datamaps=%s/datamaps" % temp_dir, "--output=%s" % temp_dir, ] main(argv, _raven_client=self.raven_client, _stats_client=self.stats_client) self.assertEqual(len(mock_generate.mock_calls), 1) args, kw = mock_generate.call_args self.assertEqual(kw["concurrency"], 1) self.assertEqual(kw["datamaps"], temp_dir + "/datamaps") self.assertEqual(kw["output"], temp_dir) self.assertEqual(kw["upload"], True)
def test_main(self, raven, stats): with util.selfdestruct_tempdir() as temp_dir: mock_generate = MagicMock() with patch.object(datamap, 'generate', mock_generate): argv = [ 'bin/location_map', '--create', '--upload', '--concurrency=1', '--output=%s' % temp_dir, ] main(argv, _raven_client=raven, _stats_client=stats, _bucketname='bucket') assert len(mock_generate.mock_calls) == 1 args, kw = mock_generate.call_args assert kw['concurrency'] == 1 assert kw['output'] == temp_dir assert kw['upload'] is True
def test_main(self): with util.selfdestruct_tempdir() as temp_dir: mock_generate = MagicMock() with patch.object(datamap, 'generate', mock_generate): argv = [ 'bin/location_map', '--create', '--upload', '--concurrency=1', '--datamaps=%s/datamaps' % temp_dir, '--output=%s' % temp_dir, ] main(argv, _raven_client=self.raven_client, _stats_client=self.stats_client) self.assertEqual(len(mock_generate.mock_calls), 1) args, kw = mock_generate.call_args self.assertEqual(kw['concurrency'], 1) self.assertEqual(kw['datamaps'], temp_dir + '/datamaps') self.assertEqual(kw['output'], temp_dir) self.assertEqual(kw['upload'], True)
def generate(db_url, bucketname, raven_client, stats_client, upload=True, concurrency=2, max_zoom=13, datamaps='', output=None): # pragma: no cover with util.selfdestruct_tempdir() as workdir: pool = billiard.Pool(processes=concurrency) if output: basedir = output else: basedir = workdir if not os.path.isdir(basedir): os.makedirs(basedir) # Concurrently export datamap table to CSV files. csvdir = os.path.join(basedir, 'csv') if not os.path.isdir(csvdir): os.mkdir(csvdir) with stats_client.timed('datamaps', tags=['func:export']): result_rows = export_files(pool, db_url, csvdir) stats_client.timing('datamaps', result_rows, tags=['count:csv_rows']) # Concurrently create quadtrees out of CSV files. quaddir = os.path.join(basedir, 'quadtrees') if os.path.isdir(quaddir): shutil.rmtree(quaddir) os.mkdir(quaddir) with stats_client.timed('datamaps', tags=['func:encode']): quadtrees = encode_files(pool, csvdir, quaddir, datamaps) stats_client.timing('datamaps', quadtrees, tags=['count:quadtrees']) pool.close() pool.join() # Merge quadtrees and make points unique. This process cannot # be made concurrent. shapes = os.path.join(basedir, 'shapes') if os.path.isdir(shapes): shutil.rmtree(shapes) with stats_client.timed('datamaps', tags=['func:merge']): merge_files(quaddir, shapes, datamaps) # Render tiles, using xargs -P to get concurrency. tiles = os.path.abspath(os.path.join(basedir, 'tiles')) with stats_client.timed('datamaps', tags=['func:render']): render_tiles(shapes, tiles, concurrency, max_zoom, datamaps, 'pngquant') if upload: # The upload process is largely network I/O bound, so we # can use more processes compared to the CPU bound tasks. pool = billiard.Pool(processes=concurrency * 2) with stats_client.timed('datamaps', tags=['func:upload']): result = upload_files(pool, bucketname, tiles, max_zoom) pool.close() pool.join() for metric, value in result.items(): stats_client.timing('datamaps', value, tags=['count:%s' % metric])
def temp_dir(): with util.selfdestruct_tempdir() as temp_dir: yield temp_dir
def main(_argv=None, _raven_client=None, _bucket_name=None): """ Command-line entry point. :param _argv: Simulated sys.argv[1:] arguments for testing :param _raven_client: override Raven client for testing :param _bucket_name: override S3 bucket name for testing :return: A system exit code :rtype: int """ # Parse the command line parser = get_parser() args = parser.parse_args(_argv) create = args.create upload = args.upload concurrency = args.concurrency verbose = args.verbose # Setup basic services if verbose: configure_logging(local_dev_env=True, logging_level="DEBUG") else: configure_logging() raven_client = configure_raven( transport="sync", tags={"app": "datamap"}, _client=_raven_client ) # Check consistent output_dir, create, upload exit_early = 0 output_dir = None if args.output: output_dir = os.path.abspath(args.output) tiles_dir = os.path.join(output_dir, "tiles") if not create and not os.path.isdir(tiles_dir): LOG.error( "The tiles subfolder of the --output directory should already" " exist when calling --upload without --create, to avoid" " deleting files from the S3 bucket.", tiles_dir=tiles_dir, ) exit_early = 1 else: if create and not upload: LOG.error( "The --output argument is required with --create but without" " --upload, since the temporary folder is removed at exit." ) exit_early = 1 if upload and not create: LOG.error( "The --output argument is required with --upload but without" " --create, to avoid deleting all tiles in the S3 bucket." ) exit_early = 1 # Exit early with help message if error or nothing to do if exit_early or not (create or upload): parser.print_help() return exit_early # Determine the S3 bucket name bucket_name = _bucket_name if not _bucket_name: bucket_name = settings("asset_bucket") if bucket_name: bucket_name = bucket_name.strip("/") # Check that the implied credentials are authorized to use the bucket if upload: if not bucket_name: LOG.error("Unable to determine upload bucket_name.") return 1 else: works, fail_msg = check_bucket(bucket_name) if not works: LOG.error( f"Bucket {bucket_name} can not be used for uploads: {fail_msg}" ) return 1 # Generate and upload the tiles success = True interrupted = False result = {} try: with Timer() as timer: if output_dir: result = generate( output_dir, bucket_name, raven_client, create=create, upload=upload, concurrency=concurrency, ) else: with util.selfdestruct_tempdir() as temp_dir: result = generate( temp_dir, bucket_name, raven_client, create=create, upload=upload, concurrency=concurrency, ) except KeyboardInterrupt: interrupted = True success = False except Exception: raven_client.captureException() success = False raise finally: if create and upload: task = "generation and upload" elif create: task = "generation" else: task = "upload" if interrupted: complete = "interrupted" elif success: complete = "complete" else: complete = "failed" final_log = structlog.get_logger("canonical-log-line") final_log.info( f"Datamap tile {task} {complete} in {timer.duration_s:0.1f} seconds.", success=success, duration_s=timer.duration_s, script_name="ichnaea.scripts.datamap", create=create, upload=upload, concurrency=concurrency, bucket_name=bucket_name, **result, ) return 0
def generate(bucketname, raven_client, upload=True, concurrency=2, max_zoom=11, output=None): with util.selfdestruct_tempdir() as workdir: pool = billiard.Pool(processes=concurrency) if output: basedir = output else: basedir = workdir if not os.path.isdir(basedir): os.makedirs(basedir) # Concurrently export datamap table to CSV files. csvdir = os.path.join(basedir, "csv") if not os.path.isdir(csvdir): os.mkdir(csvdir) with METRICS.timer("datamaps", tags=["func:export"]): result_rows = export_files(pool, csvdir) METRICS.timing("datamaps", result_rows, tags=["count:csv_rows"]) # Concurrently create quadtrees out of CSV files. quaddir = os.path.join(basedir, "quadtrees") if os.path.isdir(quaddir): shutil.rmtree(quaddir) os.mkdir(quaddir) with METRICS.timer("datamaps", tags=["func:encode"]): quadtrees = encode_files(pool, csvdir, quaddir) METRICS.timing("datamaps", quadtrees, tags=["count:quadtrees"]) pool.close() pool.join() # Merge quadtrees and make points unique. This process cannot # be made concurrent. shapes = os.path.join(basedir, "shapes") if os.path.isdir(shapes): shutil.rmtree(shapes) with METRICS.timer("datamaps", tags=["func:merge"]): merge_files(quaddir, shapes) # Render tiles, using xargs -P to get concurrency. tiles = os.path.abspath(os.path.join(basedir, "tiles")) with METRICS.timer("datamaps", tags=["func:render"]): render_tiles(shapes, tiles, concurrency, max_zoom) if upload: # The upload process is largely network I/O bound, so we # can use more processes compared to the CPU bound tasks. pool = billiard.Pool(processes=concurrency * 2) with METRICS.timer("datamaps", tags=["func:upload"]): result = upload_files(pool, bucketname, tiles, max_zoom, raven_client) pool.close() pool.join() for metric, value in result.items(): METRICS.timing("datamaps", value, tags=["count:%s" % metric])