Exemplo n.º 1
0
def load_data(model, constraint, config):
    """
    Load data of the specified model from the database. Only load data for the
    specific areas of the postal codes in config.

    :param model: SQLAlchemy model to load.
    :param constraint: A constraint from configuration to limit the spatial
    extension of the loaded data.
    :param config: A config dictionary.
    :returns: A list of loaded SQLAlchemy objects from the db
    """
    get_session = database.init_db(config["database"], config["search_index"])
    results = []
    with get_session() as session:
        areas = []
        # Get areas to fetch from, using postal codes
        for postal_code in constraint["postal_codes"]:
            areas.append(
                data_files.french_postal_codes_to_iso_3166(postal_code))
        # Load data for each area
        areas = list(set(areas))
        for area in areas:
            results.extend(
                session.query(model).filter(model.area == area).all())
        # Expunge loaded data from the session to be able to use them
        # afterwards
        session.expunge_all()
    return results
Exemplo n.º 2
0
def preprocess_data(config, force=False):
    """
    Ensures that all the necessary data have been inserted in db from the raw
    opendata files.

    :params config: A config dictionary.
    :params force: Whether to force rebuild or not.
    """
    # Check if a build is required
    get_session = database.init_db(config["database"], config["search_index"])
    with get_session() as session:
        is_built = (session.query(PublicTransport).count() > 0
                    and session.query(PostalCode).count() > 0)
        if is_built and not force:
            # No need to rebuild the database, skip
            return
        # Otherwise, purge all existing data
        session.query(PublicTransport).delete()
        session.query(PostalCode).delete()

    # Build all opendata files
    for preprocess in data_files.PREPROCESSING_FUNCTIONS:
        data_objects = preprocess()
        if not data_objects:
            raise flatisfy.exceptions.DataBuildError("Error with %s." %
                                                     preprocess.__name__)
        with get_session() as session:
            session.add_all(data_objects)
Exemplo n.º 3
0
def get_app(config):
    """
    Get a Bottle app instance with all the routes set-up.

    :return: The built bottle app.
    """
    get_session = database.init_db(config["database"], config["search_index"])

    app = bottle.default_app()
    app.install(DatabasePlugin(get_session))
    app.install(ConfigPlugin(config))
    app.config.setdefault("canister.log_level", logging.root.level)
    app.config.setdefault("canister.log_path", None)
    app.config.setdefault("canister.debug", False)
    app.install(canister.Canister())
    # Use DateAwareJSONEncoder to dump JSON strings
    # From http://stackoverflow.com/questions/21282040/bottle-framework-how-to-return-datetime-in-json-response#comment55718456_21282666.  pylint: disable=locally-disabled,line-too-long
    bottle.install(
        bottle.JSONPlugin(
            json_dumps=functools.partial(json.dumps, cls=DateAwareJSONEncoder)
        )
    )

    # API v1 routes
    app.route("/api/v1/", "GET", api_routes.index_v1)

    app.route("/api/v1/time_to_places", "GET",
              api_routes.time_to_places_v1)

    app.route("/api/v1/flats", "GET", api_routes.flats_v1)
    app.route("/api/v1/flats/status/:status", "GET",
              api_routes.flats_by_status_v1)

    app.route("/api/v1/flat/:flat_id", "GET", api_routes.flat_v1)
    app.route("/api/v1/flat/:flat_id/status", "POST",
              api_routes.update_flat_status_v1)
    app.route("/api/v1/flat/:flat_id/notes", "POST",
              api_routes.update_flat_notes_v1)
    app.route("/api/v1/flat/:flat_id/notation", "POST",
              api_routes.update_flat_notation_v1)

    app.route("/api/v1/search", "POST", api_routes.search_v1)

    # Index
    app.route("/", "GET", lambda: _serve_static_file("index.html"))

    # Static files
    app.route("/favicon.ico", "GET",
              lambda: _serve_static_file("favicon.ico"))
    app.route(
        "/assets/<filename:path>", "GET",
        lambda filename: _serve_static_file("/assets/{}".format(filename))
    )
    app.route(
        "/img/<filename:path>", "GET",
        lambda filename: _serve_static_file("/img/{}".format(filename))
    )

    return app
Exemplo n.º 4
0
def load_flats_from_db(config):
    """
    Load flats from database.

    :param config: A config dict.
    :return: A dict mapping constraint in config to all available matching
    flats.
    """
    get_session = database.init_db(config["database"], config["search_index"])

    loaded_flats = collections.defaultdict(list)
    with get_session() as session:
        for flat in session.query(flat_model.Flat).all():
            loaded_flats[flat.flatisfy_constraint].append(flat.json_api_repr())
    return loaded_flats
Exemplo n.º 5
0
def purge_db(config):
    """
    Purge the database.

    :param config: A config dict.
    :return: ``None``
    """
    get_session = database.init_db(config["database"], config["search_index"])

    with get_session() as session:
        # Delete every flat in the db
        LOGGER.info("Purge all flats from the database.")
        for flat in session.query(flat_model.Flat).all():
            # Use (slower) deletion by object, to ensure whoosh index is
            # updated
            session.delete(flat)
        LOGGER.info("Purge all postal codes from the database.")
        session.query(postal_code_model.PostalCode).delete()
        LOGGER.info("Purge all public transportations from the database.")
        session.query(public_transport_model.PublicTransport).delete()
Exemplo n.º 6
0
def get_app(config):
    """
    Get a Bottle app instance with all the routes set-up.

    :return: The built bottle app.
    """
    get_session = database.init_db(config["database"], config["search_index"])

    app = bottle.Bottle()
    app.install(DatabasePlugin(get_session))
    app.install(ConfigPlugin(config))
    app.config.setdefault("canister.log_level", "DISABLED")
    app.config.setdefault("canister.log_path", False)
    app.config.setdefault("canister.debug", False)
    app.install(canister.Canister())
    # Use DateAwareJSONEncoder to dump JSON strings
    # From http://stackoverflow.com/questions/21282040/bottle-framework-how-to-return-datetime-in-json-response#comment55718456_21282666.  pylint: disable=locally-disabled,line-too-long
    app.install(
        bottle.JSONPlugin(json_dumps=functools.partial(
            json.dumps, cls=DateAwareJSONEncoder)))

    # Enable CORS
    @app.hook("after_request")
    def enable_cors():
        """
        Add CORS headers at each request.
        """
        # The str() call is required as we import unicode_literal and WSGI
        # headers list should have plain str type.
        bottle.response.headers[str("Access-Control-Allow-Origin")] = str("*")
        bottle.response.headers[str("Access-Control-Allow-Methods")] = str(
            "PUT, GET, POST, DELETE, OPTIONS, PATCH")
        bottle.response.headers[str("Access-Control-Allow-Headers")] = str(
            "Origin, Accept, Content-Type, X-Requested-With, X-CSRF-Token")

    # API v1 routes
    app.route("/api/v1", ["GET", "OPTIONS"], api_routes.index_v1)

    app.route("/api/v1/time_to_places", ["GET", "OPTIONS"],
              api_routes.time_to_places_v1)

    app.route("/api/v1/flats", ["GET", "OPTIONS"], api_routes.flats_v1)
    app.route("/api/v1/flats/:flat_id", ["GET", "OPTIONS"], api_routes.flat_v1)
    app.route("/api/v1/flats/:flat_id", ["PATCH", "OPTIONS"],
              api_routes.update_flat_v1)

    app.route("/api/v1/ics/visits.ics", ["GET", "OPTIONS"],
              api_routes.ics_feed_v1)

    app.route("/api/v1/search", ["POST", "OPTIONS"], api_routes.search_v1)

    app.route("/api/v1/opendata", ["GET", "OPTIONS"],
              api_routes.opendata_index_v1)
    app.route(
        "/api/v1/opendata/postal_codes",
        ["GET", "OPTIONS"],
        api_routes.opendata_postal_codes_v1,
    )

    app.route("/api/v1/metadata", ["GET", "OPTIONS"], api_routes.metadata_v1)
    app.route("/api/v1/import", ["GET", "OPTIONS"], api_routes.import_v1)

    # Index
    app.route("/", "GET", lambda: _serve_static_file("index.html"))

    # Static files
    app.route("/favicon.ico", "GET", lambda: _serve_static_file("favicon.ico"))
    app.route(
        "/assets/<filename:path>",
        "GET",
        lambda filename: _serve_static_file("/assets/{}".format(filename)),
    )
    app.route(
        "/img/<filename:path>",
        "GET",
        lambda filename: _serve_static_file("/img/{}".format(filename)),
    )
    app.route(
        "/.well-known/<filename:path>",
        "GET",
        lambda filename: _serve_static_file("/.well-known/{}".format(filename)
                                            ),
    )
    app.route(
        "/data/img/<filename:path>",
        "GET",
        lambda filename: bottle.static_file(
            filename, root=os.path.join(config["data_directory"], "images")),
    )

    return app
Exemplo n.º 7
0
def import_and_filter(config, load_from_db=False, new_only=False):
    """
    Fetch the available flats list. Then, filter it according to criteria.
    Finally, store it in the database.

    :param config: A config dict.
    :param load_from_db: Whether to load flats from database or fetch them
        using Woob.
    :return: ``None``.
    """
    # Fetch and filter flats list
    past_flats = fetch.load_flats_from_db(config)
    if load_from_db:
        fetched_flats = past_flats
    else:
        fetched_flats = fetch.fetch_flats(config)
    # Do not fetch additional details if we loaded data from the db.
    flats_by_status = filter_fetched_flats(
        config,
        fetched_flats=fetched_flats,
        fetch_details=(not load_from_db),
        past_flats=past_flats if new_only else {},
    )
    # Create database connection
    get_session = database.init_db(config["database"], config["search_index"])

    new_flats = []
    result = []

    LOGGER.info("Merging fetched flats in database...")
    # Flatten the flats_by_status dict
    flatten_flats_by_status = collections.defaultdict(list)
    for flats in flats_by_status.values():
        for status, flats_list in flats.items():
            flatten_flats_by_status[status].extend(flats_list)

    with get_session() as session:
        # Set is_expired to true for all existing flats.
        # This will be set back to false if we find them during importing.
        for flat in session.query(flat_model.Flat).all():
            flat.is_expired = True

        for status, flats_list in flatten_flats_by_status.items():
            # Build SQLAlchemy Flat model objects for every available flat
            flats_objects = {flat_dict["id"]: flat_model.Flat.from_dict(flat_dict) for flat_dict in flats_list}

            if flats_objects:
                # If there are some flats, try to merge them with the ones in
                # db
                existing_flats_queries = session.query(flat_model.Flat).filter(
                    flat_model.Flat.id.in_(flats_objects.keys())
                )
                for each in existing_flats_queries.all():
                    # For each flat to merge, take care not to overwrite the
                    # status if the user defined it
                    flat_object = flats_objects[each.id]
                    if each.status in flat_model.AUTOMATED_STATUSES:
                        flat_object.status = getattr(flat_model.FlatStatus, status)
                    else:
                        flat_object.status = each.status

                    # Every flat we fetched isn't expired
                    flat_object.is_expired = False

                    # For each flat already in the db, merge it (UPDATE)
                    # instead of adding it
                    session.merge(flats_objects.pop(each.id))

            # For any other flat, it is not already in the database, so we can
            # just set the status field without worrying
            for flat in flats_objects.values():
                flat.status = getattr(flat_model.FlatStatus, status)
                if flat.status == flat_model.FlatStatus.new:
                    new_flats.append(flat)
                    result.append(flat.id)

            session.add_all(flats_objects.values())

        if config["send_email"]:
            email.send_notification(config, new_flats)

    LOGGER.info(f"Found {len(result)} new flats.")

    # Touch a file to indicate last update timestamp
    ts_file = os.path.join(config["data_directory"], "timestamp")
    with open(ts_file, "w"):
        os.utime(ts_file, None)

    LOGGER.info("Done!")
    return result
Exemplo n.º 8
0
def import_and_filter(config, load_from_db=False):
    """
    Fetch the available flats list. Then, filter it according to criteria.
    Finally, store it in the database.

    :param config: A config dict.
    :param load_from_db: Whether to load flats from database or fetch them
        using WebOOB.
    :return: ``None``.
    """
    # Fetch and filter flats list
    if load_from_db:
        fetched_flats = fetch.load_flats_from_db(config)
    else:
        fetched_flats = fetch.fetch_flats(config)
    # Do not fetch additional details if we loaded data from the db.
    flats_by_status = filter_fetched_flats(config,
                                           fetched_flats=fetched_flats,
                                           fetch_details=(not load_from_db))
    # Create database connection
    get_session = database.init_db(config["database"], config["search_index"])

    new_flats = []

    LOGGER.info("Merging fetched flats in database...")
    # Flatten the flats_by_status dict
    flatten_flats_by_status = collections.defaultdict(list)
    for flats in flats_by_status.values():
        for status, flats_list in flats.items():
            flatten_flats_by_status[status].extend(flats_list)

    with get_session() as session:
        for status, flats_list in flatten_flats_by_status.items():
            # Build SQLAlchemy Flat model objects for every available flat
            flats_objects = {
                flat_dict["id"]: flat_model.Flat.from_dict(flat_dict)
                for flat_dict in flats_list
            }

            if flats_objects:
                # If there are some flats, try to merge them with the ones in
                # db
                existing_flats_queries = session.query(flat_model.Flat).filter(
                    flat_model.Flat.id.in_(flats_objects.keys()))
                for each in existing_flats_queries.all():
                    # For each flat to merge, take care not to overwrite the
                    # status if the user defined it
                    flat_object = flats_objects[each.id]
                    if each.status in flat_model.AUTOMATED_STATUSES:
                        flat_object.status = getattr(flat_model.FlatStatus,
                                                     status)
                    else:
                        flat_object.status = each.status
                    # For each flat already in the db, merge it (UPDATE)
                    # instead of adding it
                    session.merge(flats_objects.pop(each.id))

            # For any other flat, it is not already in the database, so we can
            # just set the status field without worrying
            for flat in flats_objects.values():
                flat.status = getattr(flat_model.FlatStatus, status)
                if flat.status == flat_model.FlatStatus.new:
                    new_flats.append(flat)

            session.add_all(flats_objects.values())

        if config["send_email"]:
            email.send_notification(config, new_flats)

    LOGGER.info("Done!")