Пример #1
0
def load_file(db, redis_client, datatype, filename):  # pragma: no cover
    celery_app.data_queues = configure_data(redis_client)
    task = FakeTask(celery_app)
    with redis_pipeline(redis_client) as pipe:
        with db_worker_session(db) as session:
            ocid.ImportLocal(task, session, pipe,
                             cell_type=datatype)(filename=filename)
Пример #2
0
def load_file(db, redis_client, datatype, filename):  # pragma: no cover
    with redis_pipeline(redis_client) as pipe:
        with db_worker_session(db) as session:
            ocid.ImportLocal(
                None, session, pipe,
                cell_type=datatype,
                update_area_task=update_area)(filename=filename)
Пример #3
0
def load_file(db, redis_client, datatype, filename):  # pragma: no cover
    celery_app.data_queues = configure_data(redis_client)
    task = FakeTask(celery_app)
    with redis_pipeline(redis_client) as pipe:
        with db_worker_session(db) as session:
            ocid.ImportLocal(
                task, cell_type=datatype)(pipe, session, filename=filename)
Пример #4
0
def main(argv, _db=None):
    parser = argparse.ArgumentParser(
        prog=argv[0],
        description=(
            "Import from public cell data into a local dev environment. "
            "See https://location.services.mozilla.com/downloads"),
    )
    parser.add_argument("filename", help="Path to the csv.gz import file.")

    args = parser.parse_args(argv[1:])

    if not settings("local_dev_env"):
        print("This script can only be run in a local dev environment.")
        print("Set LOCAL_DEV_ENV=True in your environment.")
        return 1

    filename = os.path.abspath(os.path.expanduser(args.filename))
    if not os.path.isfile(filename):
        print("File %s not found." % filename)
        return 1

    configure_logging()
    celery_app = get_eager_celery_app()
    init_worker(celery_app)
    cellarea_queue = celery_app.data_queues["update_cellarea"]

    with db_worker_session(celery_app.db, commit=False) as session:
        with gzip_open(filename, "r") as file_handle:
            read_stations_from_csv(session, file_handle,
                                   celery_app.redis_client, cellarea_queue)
    return 0
Пример #5
0
    def db_session(self, commit=True):
        """
        Returns a database session usable as a context manager.

        :param commit: Should the session be committed or aborted at the end?
        :type commit: bool
        """
        return db_worker_session(self.app.db_rw, commit=commit)
Пример #6
0
    def db_session(self, commit=True, isolation_level=None):
        """
        Returns a database session usable as a context manager.

        :param commit: Should the session be committed or aborted at the end?
        :type commit: bool
        :param isolation_level: Set a new transaction isolation level for this session
        """
        return db_worker_session(
            self.app.db, commit=commit, isolation_level=isolation_level
        )
Пример #7
0
def main(argv, _db=None, _dump_file=dump_file):
    parser = argparse.ArgumentParser(prog=argv[0],
                                     description='Dump/export data.')
    parser.add_argument('--datatype',
                        required=True,
                        help='Type of the data file, blue, cell or wifi')
    parser.add_argument('--filename',
                        required=True,
                        help='Path to the csv.gz export file.')
    parser.add_argument('--lat',
                        default=None,
                        help='The center latitude of the desired area.')
    parser.add_argument('--lon',
                        default=None,
                        help='The center longitude of the desired area.')
    parser.add_argument('--radius',
                        default=None,
                        help='The radius of the desired area.')

    args = parser.parse_args(argv[1:])
    if not args.filename:  # pragma: no cover
        parser.print_help()
        return 1

    filename = os.path.abspath(os.path.expanduser(args.filename))
    if os.path.isfile(filename):  # pragma: no cover
        print('File already exists.')
        return 1

    datatype = args.datatype
    if datatype not in ('blue', 'cell', 'wifi'):  # pragma: no cover
        print('Unknown data type.')
        return 1

    lat, lon, radius = (None, None, None)
    if (args.lat is not None and args.lon is not None
            and args.radius is not None):
        lat = float(args.lat)
        lon = float(args.lon)
        radius = int(args.radius)

    configure_logging()

    db = configure_db('ro', _db=_db)
    with db_worker_session(db, commit=False) as session:
        exit_code = _dump_file(datatype,
                               session,
                               filename,
                               lat=lat,
                               lon=lon,
                               radius=radius)
    return exit_code
Пример #8
0
def main(argv, _db=None, _dump_file=dump_file):
    parser = argparse.ArgumentParser(prog=argv[0],
                                     description="Dump/export data.")
    parser.add_argument("--datatype",
                        required=True,
                        help="Type of the data file, blue, cell or wifi")
    parser.add_argument("--filename",
                        required=True,
                        help="Path to the csv.gz export file.")
    parser.add_argument("--lat",
                        default=None,
                        help="The center latitude of the desired area.")
    parser.add_argument("--lon",
                        default=None,
                        help="The center longitude of the desired area.")
    parser.add_argument("--radius",
                        default=None,
                        help="The radius of the desired area.")

    args = parser.parse_args(argv[1:])
    if not args.filename:
        parser.print_help()
        return 1

    filename = os.path.abspath(os.path.expanduser(args.filename))
    if os.path.isfile(filename):
        print("File already exists.")
        return 1

    datatype = args.datatype
    if datatype not in ("blue", "cell", "wifi"):
        print("Unknown data type.")
        return 1

    lat, lon, radius = (None, None, None)
    if args.lat is not None and args.lon is not None and args.radius is not None:
        lat = float(args.lat)
        lon = float(args.lon)
        radius = int(args.radius)

    configure_logging()

    db = configure_db("ro", _db=_db, pool=False)
    with db_worker_session(db, commit=False) as session:
        exit_code = _dump_file(datatype,
                               session,
                               filename,
                               lat=lat,
                               lon=lon,
                               radius=radius)
    return exit_code
Пример #9
0
def show_api_key_details(ctx, key):
    """Print api key details to stdout."""
    db = configure_db("rw")
    with db_worker_session(db) as session:
        row = session.query(ApiKey).filter(
            ApiKey.valid_key == key).one_or_none()
        if row:
            api_key = Key.from_obj(row)
        else:
            api_key = None

    if api_key:
        table = [[name, value] for name, value in api_key.as_dict().items()]
        print_table(table, delimiter=" : ", stream_write=click_echo_no_nl)
    else:
        click.echo(f"API key '{key}' does not exist")
Пример #10
0
def show_api_key_details(key):
    """Print api key details to stdout."""
    db = configure_db("rw")
    with db_worker_session(db) as session:
        columns = ApiKey.__table__.columns
        fields = [getattr(columns, f) for f in API_KEY_COLUMN_NAMES]
        row = (session.execute(
            select(fields).where(columns.valid_key == key))).fetchone()
        if row is not None:
            key = Key(**dict(row.items()))
        else:
            key = None
    table = []
    for field in API_KEY_COLUMN_NAMES:
        table.append([field, getattr(key, field, "")])

    print_table(table, " : ")
Пример #11
0
def export_file(filename, tablename, _db=None, _session=None):
    today = util.utcnow().date()
    one_year_ago = today - timedelta(days=365)
    one_year_ago = one_year_ago.strftime('%Y-%m-%d')
    # this is executed in a worker process
    stmt = text('''\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE modified >= '{modified}'
LIMIT :limit OFFSET :offset
'''.format(tablename=tablename, modified=one_year_ago).replace('\n', ' '))
    db = configure_db('ro', _db=_db)

    offset = 0
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, 'w', compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, offset=offset))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                offset += limit

    if not result_rows:
        os.remove(filename)

    db.close()
    return result_rows
Пример #12
0
def export_file(filename, tablename, _db=None, _session=None):
    # this is executed in a worker process
    stmt = text("""\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE `grid` > :grid
ORDER BY `grid`
LIMIT :limit
""".format(tablename=tablename).replace("\n", " "))

    db = configure_db("ro", _db=_db, pool=False)
    min_grid = b""
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, "w", compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, grid=min_grid))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                min_grid = rows[-1].grid

    if not result_rows:
        os.remove(filename)

    db.close()
    return result_rows
Пример #13
0
def export_file(filename, tablename, _db=None, _session=None):
    # this is executed in a worker process
    stmt = text('''\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE `grid` > :grid
ORDER BY `grid`
LIMIT :limit
'''.format(tablename=tablename).replace('\n', ' '))

    db = configure_db('ro', transport='sync', _db=_db)
    min_grid = b''
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, 'w', compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, grid=min_grid))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                min_grid = rows[-1].grid

    if not result_rows:
        os.remove(filename)

    db.close()
    return result_rows
Пример #14
0
def create_api_key(key):
    """Create a new api key."""
    key = key or str(uuid.uuid4())

    db = configure_db("rw")
    with db_worker_session(db) as session:
        try:
            session.execute(
                insert(ApiKey.__table__).values(
                    valid_key=key,
                    allow_fallback=False,
                    allow_locate=True,
                    allow_region=True,
                    store_sample_locate=100,
                    store_sample_submit=100,
                ))
            print("Created API key: %r" % key)
        except IntegrityError:
            print("API key %r exists" % key)
Пример #15
0
def main(argv, _db=None, _dump_file=dump_file):
    parser = argparse.ArgumentParser(
        prog=argv[0], description='Dump/export data.')
    parser.add_argument('--datatype', required=True,
                        help='Type of the data file, blue, cell or wifi')
    parser.add_argument('--filename', required=True,
                        help='Path to the csv.gz export file.')
    parser.add_argument('--lat', default=None,
                        help='The center latitude of the desired area.')
    parser.add_argument('--lon', default=None,
                        help='The center longitude of the desired area.')
    parser.add_argument('--radius', default=None,
                        help='The radius of the desired area.')

    args = parser.parse_args(argv[1:])
    if not args.filename:  # pragma: no cover
        parser.print_help()
        return 1

    filename = os.path.abspath(os.path.expanduser(args.filename))
    if os.path.isfile(filename):  # pragma: no cover
        print('File already exists.')
        return 1

    datatype = args.datatype
    if datatype not in ('blue', 'cell', 'wifi'):  # pragma: no cover
        print('Unknown data type.')
        return 1

    lat, lon, radius = (None, None, None)
    if (args.lat is not None and
            args.lon is not None and args.radius is not None):
        lat = float(args.lat)
        lon = float(args.lon)
        radius = int(args.radius)

    configure_logging()

    db = configure_db('ro', transport='sync', _db=_db)
    with db_worker_session(db, commit=False) as session:
        exit_code = _dump_file(
            datatype, session, filename, lat=lat, lon=lon, radius=radius)
    return exit_code
Пример #16
0
def export_file(db_url, filename, tablename, _db_rw=None, _session=None):
    # this is executed in a worker process
    stmt = text('''\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
LIMIT :limit OFFSET :offset
'''.format(tablename=tablename).replace('\n', ' '))
    db = configure_db(db_url, _db=_db_rw)

    offset = 0
    limit = 200000

    result_rows = 0
    with util.gzip_open(filename, 'w', compresslevel=2) as fd:
        with db_worker_session(db, commit=False) as session:
            if _session is not None:
                # testing hook
                session = _session
            while True:
                result = session.execute(
                    stmt.bindparams(limit=limit, offset=offset))
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)
                offset += limit

    if not result_rows:
        os.remove(filename)

    db.engine.pool.dispose()
    return result_rows
Пример #17
0
def list_api_keys(ctx):
    """List all api keys in db."""
    show_fields = [
        "valid_key", "allow_fallback", "allow_locate", "allow_region"
    ]

    db = configure_db("rw")
    with db_worker_session(db) as session:
        columns = ApiKey.__table__.columns
        fields = [getattr(columns, f) for f in show_fields]
        rows = session.execute(select(fields)).fetchall()

    click.echo("%d api keys." % len(rows))
    if rows:
        # Add header row
        table = [show_fields]
        # Add rest of the rows; the columns are in the order of show_fields so we
        # don't have to do any re-ordering
        table.extend(rows)
        print_table(table, stream_write=click_echo_no_nl)
Пример #18
0
def create_api_key(ctx, maxreq, key):
    """Create a new api key.

    If KEY is not specified, it uses a uuid4.

    """
    key = key or str(uuid.uuid4())

    db = configure_db("rw")
    with db_worker_session(db) as session:
        try:
            session.execute(
                insert(ApiKey.__table__).values(
                    valid_key=key,
                    maxreq=maxreq,
                    allow_fallback=False,
                    allow_locate=True,
                    allow_region=True,
                    store_sample_locate=100,
                    store_sample_submit=100,
                ))
            click.echo("Created API key: %r" % key)
        except IntegrityError:
            click.echo("API key %r exists" % key)
Пример #19
0
def generate(db, bucketname, heka_client, stats_client,
             upload=True, concurrency=2, datamaps='', output=None):
    datamaps_encode = os.path.join(datamaps, 'encode')
    datamaps_enumerate = os.path.join(datamaps, 'enumerate')
    datamaps_render = os.path.join(datamaps, 'render')

    with tempdir() as workdir:
        csv = os.path.join(workdir, 'map.csv')

        with stats_client.timer("datamaps.export_to_csv"):
            with db_worker_session(db) as session:
                result_rows = export_to_csv(session, csv)

        stats_client.timing('datamaps.csv_rows', result_rows)

        # create shapefile / quadtree
        shapes = os.path.join(workdir, 'shapes')
        cmd = '{encode} -z15 -o {output} {input}'.format(
            encode=datamaps_encode,
            output=shapes,
            input=csv)

        with stats_client.timer("datamaps.encode"):
            system_call(cmd)

        # render tiles
        if output:
            tiles = output
        else:
            tiles = os.path.join(workdir, 'tiles')
        cmd = ("{enumerate} -z{zoom} {shapes} | xargs -L1 -P{concurrency} "
               "sh -c 'mkdir -p {output}/$2/$3; {render} "
               "-B 12:0.0379:0.874 -c0088FF -t0 "
               "-O 16:1600:1.5 -G 0.5{extra} $1 $2 $3 $4 | "
               "pngquant --speed=3 --quality=65-95 32 > "
               "{output}/$2/$3/$4{suffix}.png' dummy")

        zoom_0_cmd = cmd.format(
            enumerate=datamaps_enumerate,
            zoom=0,
            shapes=shapes,
            concurrency=concurrency,
            render=datamaps_render,
            output=tiles,
            extra=' -T 512',
            suffix='@2x')

        # create high-res version for zoom level 0
        system_call(zoom_0_cmd)

        zoom_all_cmd = cmd.format(
            enumerate=datamaps_enumerate,
            zoom=13,
            shapes=shapes,
            concurrency=concurrency,
            render=datamaps_render,
            output=tiles,
            extra='',
            suffix='')

        with stats_client.timer("datamaps.render"):
            system_call(zoom_all_cmd)

        if upload:  # pragma: no cover
            with stats_client.timer("datamaps.upload_to_s3"):
                result = upload_to_s3(bucketname, tiles)

            for metric, value in result.items():
                stats_client.timing('datamaps.%s' % metric, value)
Пример #20
0
 def db_session(self):
     # returns a context manager
     return db_worker_session(self.app.db_master)
Пример #21
0
def main(
    ping_connections=False,
    _db=None,
    _geoip_db=None,
    _http_session=None,
    _raven_client=None,
    _redis_client=None,
    _position_searcher=None,
    _region_searcher=None,
):
    """
    Configure the web app stored in :data:`ichnaea.webapp.app._APP`.

    Does connection, logging and view config setup. Attaches some
    additional functionality to the :class:`pyramid.registry.Registry`
    instance.

    At startup ping all outbound connections like the database
    once, to ensure they are actually up and responding.

    The parameters starting with an underscore are test-only hooks
    to provide pre-configured connection objects.

    :param ping_connections: If True, ping and test outside connections.
    :type ping_connections: bool

    :returns: A configured WSGI app, the result of calling
              :meth:`pyramid.config.Configurator.make_wsgi_app`.
    """

    configure_logging()

    config = Configurator()
    check_config()

    # add support for pt templates
    config.include("pyramid_chameleon")

    # add a config setting to skip logging for some views
    config.registry.skip_logging = set()

    configure_api(config)
    configure_content(config)
    configure_monitor(config)

    # configure outside connections
    registry = config.registry

    registry.db = configure_db("ro", _db=_db)

    registry.raven_client = raven_client = configure_raven(
        transport="gevent", tags={"app": "webapp"}, _client=_raven_client
    )

    registry.redis_client = redis_client = configure_redis(_client=_redis_client)

    configure_stats()

    registry.http_session = configure_http_session(_session=_http_session)

    registry.geoip_db = geoip_db = configure_geoip(
        raven_client=raven_client, _client=_geoip_db
    )

    # Needs to be the exact same as the *_incoming entries in taskapp.config.
    registry.data_queues = data_queues = {
        "update_incoming": DataQueue(
            "update_incoming", redis_client, "report", batch=100, compress=True
        )
    }

    for name, func, default in (
        ("position_searcher", configure_position_searcher, _position_searcher),
        ("region_searcher", configure_region_searcher, _region_searcher),
    ):
        searcher = func(
            geoip_db=geoip_db,
            raven_client=raven_client,
            redis_client=redis_client,
            data_queues=data_queues,
            _searcher=default,
        )
        setattr(registry, name, searcher)

    config.add_tween("ichnaea.db.db_tween_factory", under=EXCVIEW)
    config.add_tween("ichnaea.log.log_tween_factory", under=EXCVIEW)
    config.add_request_method(db_session, property=True)

    # freeze skip logging set
    config.registry.skip_logging = frozenset(config.registry.skip_logging)

    # Should we try to initialize and establish the outbound connections?
    if ping_connections:
        with db_worker_session(registry.db, commit=False) as session:
            ping_session(session)
        registry.redis_client.ping()

    return config.make_wsgi_app()
Пример #22
0
def main(global_config, heka_config=None, init=False,
         _db_master=None, _db_slave=None, _heka_client=None, _redis=None,
         _stats_client=None, **settings):
    config = Configurator(settings=settings)

    # add support for pt templates
    config.include('pyramid_chameleon')

    settings = config.registry.settings

    from ichnaea.content.views import configure_content
    from ichnaea.logging import configure_heka
    from ichnaea.logging import configure_stats
    from ichnaea.service import configure_service

    configure_content(config)
    configure_service(config)

    # configure databases incl. test override hooks
    if _db_master is None:
        config.registry.db_master = Database(settings['db_master'])
    else:
        config.registry.db_master = _db_master
    if _db_slave is None:
        config.registry.db_slave = Database(settings['db_slave'])
    else:
        config.registry.db_slave = _db_slave

    if _redis is None:
        config.registry.redis_client = None
        if 'redis_url' in settings:
            config.registry.redis_client = redis_client(settings['redis_url'])
    else:
        config.registry.redis_client = _redis

    config.registry.geoip_db = configure_geoip(config.registry.settings)

    if _heka_client is None:
        config.registry.heka_client = configure_heka(heka_config)
    else:
        config.registry.heka_client = _heka_client

    config.registry.stats_client = configure_stats(
        settings.get('statsd_host'), _client=_stats_client)

    config.add_tween('ichnaea.db.db_tween_factory', under=EXCVIEW)
    config.add_tween('ichnaea.logging.log_tween_factory', under=EXCVIEW)
    config.add_request_method(db_master_session, property=True)
    config.add_request_method(db_slave_session, property=True)

    # replace json renderer with custom json variant
    config.add_renderer('json', customjson.Renderer())

    # Should we try to initialize and establish the outbound connections?
    if init:
        # Test the slave DB connection
        with db_worker_session(config.registry.db_slave) as session:
            try:
                session.execute(select([func.now()])).first()
            except OperationalError:
                # Let the instance start, so it can recover / reconnect
                # to the DB later, but provide degraded service in the
                # meantime.
                pass

        # Test the redis connection
        try:
            config.registry.redis_client.ping()
        except ConnectionError:
            # Same as for the DB, continue with degraded service.
            pass

    return config.make_wsgi_app()
Пример #23
0
def generate(db, bucketname, heka_client, stats_client,
             upload=True, concurrency=2, datamaps='', output=None):
    datamaps_encode = os.path.join(datamaps, 'encode')
    datamaps_enumerate = os.path.join(datamaps, 'enumerate')
    datamaps_render = os.path.join(datamaps, 'render')

    with tempdir() as workdir:
        csv = os.path.join(workdir, 'map.csv')

        with stats_client.timer("datamaps.export_to_csv"):
            with db_worker_session(db) as session:
                result_rows = export_to_csv(session, csv)

        stats_client.timing('datamaps.csv_rows', result_rows)

        # create shapefile / quadtree
        shapes = os.path.join(workdir, 'shapes')
        cmd = '{encode} -z15 -o {output} {input}'.format(
            encode=datamaps_encode,
            output=shapes,
            input=csv)

        with stats_client.timer("datamaps.encode"):
            system_call(cmd)

        # render tiles
        if output:
            tiles = output
        else:
            tiles = os.path.join(workdir, 'tiles')
        cmd = ("{enumerate} -z{zoom} {shapes} | xargs -L1 -P{concurrency} "
               "sh -c 'mkdir -p {output}/$2/$3; {render} "
               "-B 12:0.0379:0.874 -c0088FF -t0 "
               "-O 16:1600:1.5 -G 0.5{extra} $1 $2 $3 $4 | "
               "pngquant --speed=3 --quality=65-95 32 > "
               "{output}/$2/$3/$4{suffix}.png' dummy")

        zoom_0_cmd = cmd.format(
            enumerate=datamaps_enumerate,
            zoom=0,
            shapes=shapes,
            concurrency=concurrency,
            render=datamaps_render,
            output=tiles,
            extra=' -T 512',
            suffix='@2x')

        # create high-res version for zoom level 0
        system_call(zoom_0_cmd)

        zoom_all_cmd = cmd.format(
            enumerate=datamaps_enumerate,
            zoom=13,
            shapes=shapes,
            concurrency=concurrency,
            render=datamaps_render,
            output=tiles,
            extra='',
            suffix='')

        with stats_client.timer("datamaps.render"):
            system_call(zoom_all_cmd)

        if upload:  # pragma: no cover
            with stats_client.timer("datamaps.upload_to_s3"):
                result = upload_to_s3(bucketname, tiles)

            for metric, value in result.items():
                stats_client.timing('datamaps.%s' % metric, value)
Пример #24
0
def export_to_csv(filename, csv_dir, tablename, row_limit=None, file_limit=None):
    """
    Export a datamap table to a CSV file.

    :param filename: An output file ending in .csv
    :param csv_dir: The output directory
    :param tablename: The name of the datamap table to export
    :param row_limit: The number of rows to fetch at a time
    :param file_limit: The number of output rows before rotating files
    :return: A tuple (rows exported, files created)

    Each database row is turned into 0 to 6 similar CSV rows by
    random_points(), based on how recently they were recorded.

    If file_limit is not reached, the output file will the filename.
    If file_limit is reached, the output files will have a serial number and
    be based on the filename. For example, "map.csv" will become "map_0001.csv",
    "map_0002.csv", etc.
    """
    stmt = text(
        """\
SELECT
`grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num`
FROM {tablename}
WHERE `grid` > :grid
ORDER BY `grid`
LIMIT :limit
""".format(
            tablename=tablename
        ).replace(
            "\n", " "
        )
    )

    db = configure_db("ro", pool=False)
    min_grid = b""
    row_limit = row_limit or 200_000
    file_limit = file_limit or 10_000_000

    result_rows = 0
    file_path = os.path.join(csv_dir, filename)
    fd = open(file_path, "w")
    file_count = 1
    file_rows = 0
    orig_filename = filename
    orig_file_path = file_path
    assert filename.endswith(".csv")
    try:
        with db_worker_session(db, commit=False) as session:
            while True:
                result = session.execute(
                    stmt.bindparams(limit=row_limit, grid=min_grid)
                )
                rows = result.fetchall()
                result.close()
                if not rows:
                    break

                lines = []
                extend = lines.extend
                for row in rows:
                    lat, lon = decode_datamap_grid(row.grid)
                    extend(random_points(lat, lon, row.num))

                fd.writelines(lines)
                result_rows += len(lines)

                # Rotate the file when needed
                file_rows += len(lines)
                if result_rows >= file_limit:
                    fd.close()
                    file_count += 1
                    file_rows = 0
                    filename = "sub" + orig_filename.replace(
                        ".csv", f"_{file_count:04}.csv"
                    )
                    file_path = os.path.join(csv_dir, filename)
                    fd = open(file_path, "w")

                min_grid = rows[-1].grid
    finally:
        fd.close()

    if not file_rows:
        os.remove(file_path)
        file_count -= 1

    if file_count > 1:
        # Rename first file to serial CSV format
        filename = "sub" + orig_filename.replace(".csv", "_0001.csv")
        file_path = os.path.join(csv_dir, filename)
        os.rename(orig_file_path, file_path)

    db.close()
    return result_rows, file_count
Пример #25
0
 def db_session(self, commit=True):
     # returns a context manager
     return db_worker_session(self.app.db_rw, commit=commit)
Пример #26
0
 def db_session(self):
     # returns a context manager
     return db_worker_session(self.app.db_master)
Пример #27
0
def main(global_config,
         heka_config=None,
         init=False,
         _db_master=None,
         _db_slave=None,
         _heka_client=None,
         _redis=None,
         _stats_client=None,
         **settings):
    config = Configurator(settings=settings)

    # add support for pt templates
    config.include('pyramid_chameleon')

    settings = config.registry.settings

    from ichnaea.content.views import configure_content
    from ichnaea.logging import configure_heka
    from ichnaea.logging import configure_stats
    from ichnaea.service import configure_service

    configure_content(config)
    configure_service(config)

    # configure databases incl. test override hooks
    if _db_master is None:
        config.registry.db_master = Database(settings['db_master'])
    else:
        config.registry.db_master = _db_master
    if _db_slave is None:
        config.registry.db_slave = Database(settings['db_slave'])
    else:
        config.registry.db_slave = _db_slave

    if _redis is None:
        config.registry.redis_client = None
        if 'redis_url' in settings:
            config.registry.redis_client = redis_client(settings['redis_url'])
    else:
        config.registry.redis_client = _redis

    if _heka_client is None:  # pragma: no cover
        config.registry.heka_client = heka_client = configure_heka(heka_config)
    else:
        config.registry.heka_client = heka_client = _heka_client

    config.registry.stats_client = configure_stats(settings.get('statsd_host'),
                                                   _client=_stats_client)

    config.registry.geoip_db = configure_geoip(config.registry.settings,
                                               heka_client=heka_client)

    config.add_tween('ichnaea.db.db_tween_factory', under=EXCVIEW)
    config.add_tween('ichnaea.logging.log_tween_factory', under=EXCVIEW)
    config.add_request_method(db_master_session, property=True)
    config.add_request_method(db_slave_session, property=True)

    # replace json renderer with custom json variant
    config.add_renderer('json', customjson.Renderer())

    # Should we try to initialize and establish the outbound connections?
    if init:  # pragma: no cover
        # Test the slave DB connection
        with db_worker_session(config.registry.db_slave) as session:
            try:
                session.execute(select([func.now()])).first()
            except OperationalError:
                # Let the instance start, so it can recover / reconnect
                # to the DB later, but provide degraded service in the
                # meantime.
                pass

        # Test the redis connection
        try:
            config.registry.redis_client.ping()
        except ConnectionError:
            # Same as for the DB, continue with degraded service.
            pass

    return config.make_wsgi_app()