def load_data(model, constraint, config): """ Load data of the specified model from the database. Only load data for the specific areas of the postal codes in config. :param model: SQLAlchemy model to load. :param constraint: A constraint from configuration to limit the spatial extension of the loaded data. :param config: A config dictionary. :returns: A list of loaded SQLAlchemy objects from the db """ get_session = database.init_db(config["database"], config["search_index"]) results = [] with get_session() as session: areas = [] # Get areas to fetch from, using postal codes for postal_code in constraint["postal_codes"]: areas.append( data_files.french_postal_codes_to_iso_3166(postal_code)) # Load data for each area areas = list(set(areas)) for area in areas: results.extend( session.query(model).filter(model.area == area).all()) # Expunge loaded data from the session to be able to use them # afterwards session.expunge_all() return results
def preprocess_data(config, force=False): """ Ensures that all the necessary data have been inserted in db from the raw opendata files. :params config: A config dictionary. :params force: Whether to force rebuild or not. """ # Check if a build is required get_session = database.init_db(config["database"], config["search_index"]) with get_session() as session: is_built = (session.query(PublicTransport).count() > 0 and session.query(PostalCode).count() > 0) if is_built and not force: # No need to rebuild the database, skip return # Otherwise, purge all existing data session.query(PublicTransport).delete() session.query(PostalCode).delete() # Build all opendata files for preprocess in data_files.PREPROCESSING_FUNCTIONS: data_objects = preprocess() if not data_objects: raise flatisfy.exceptions.DataBuildError("Error with %s." % preprocess.__name__) with get_session() as session: session.add_all(data_objects)
def get_app(config): """ Get a Bottle app instance with all the routes set-up. :return: The built bottle app. """ get_session = database.init_db(config["database"], config["search_index"]) app = bottle.default_app() app.install(DatabasePlugin(get_session)) app.install(ConfigPlugin(config)) app.config.setdefault("canister.log_level", logging.root.level) app.config.setdefault("canister.log_path", None) app.config.setdefault("canister.debug", False) app.install(canister.Canister()) # Use DateAwareJSONEncoder to dump JSON strings # From http://stackoverflow.com/questions/21282040/bottle-framework-how-to-return-datetime-in-json-response#comment55718456_21282666. pylint: disable=locally-disabled,line-too-long bottle.install( bottle.JSONPlugin( json_dumps=functools.partial(json.dumps, cls=DateAwareJSONEncoder) ) ) # API v1 routes app.route("/api/v1/", "GET", api_routes.index_v1) app.route("/api/v1/time_to_places", "GET", api_routes.time_to_places_v1) app.route("/api/v1/flats", "GET", api_routes.flats_v1) app.route("/api/v1/flats/status/:status", "GET", api_routes.flats_by_status_v1) app.route("/api/v1/flat/:flat_id", "GET", api_routes.flat_v1) app.route("/api/v1/flat/:flat_id/status", "POST", api_routes.update_flat_status_v1) app.route("/api/v1/flat/:flat_id/notes", "POST", api_routes.update_flat_notes_v1) app.route("/api/v1/flat/:flat_id/notation", "POST", api_routes.update_flat_notation_v1) app.route("/api/v1/search", "POST", api_routes.search_v1) # Index app.route("/", "GET", lambda: _serve_static_file("index.html")) # Static files app.route("/favicon.ico", "GET", lambda: _serve_static_file("favicon.ico")) app.route( "/assets/<filename:path>", "GET", lambda filename: _serve_static_file("/assets/{}".format(filename)) ) app.route( "/img/<filename:path>", "GET", lambda filename: _serve_static_file("/img/{}".format(filename)) ) return app
def load_flats_from_db(config): """ Load flats from database. :param config: A config dict. :return: A dict mapping constraint in config to all available matching flats. """ get_session = database.init_db(config["database"], config["search_index"]) loaded_flats = collections.defaultdict(list) with get_session() as session: for flat in session.query(flat_model.Flat).all(): loaded_flats[flat.flatisfy_constraint].append(flat.json_api_repr()) return loaded_flats
def purge_db(config): """ Purge the database. :param config: A config dict. :return: ``None`` """ get_session = database.init_db(config["database"], config["search_index"]) with get_session() as session: # Delete every flat in the db LOGGER.info("Purge all flats from the database.") for flat in session.query(flat_model.Flat).all(): # Use (slower) deletion by object, to ensure whoosh index is # updated session.delete(flat) LOGGER.info("Purge all postal codes from the database.") session.query(postal_code_model.PostalCode).delete() LOGGER.info("Purge all public transportations from the database.") session.query(public_transport_model.PublicTransport).delete()
def get_app(config): """ Get a Bottle app instance with all the routes set-up. :return: The built bottle app. """ get_session = database.init_db(config["database"], config["search_index"]) app = bottle.Bottle() app.install(DatabasePlugin(get_session)) app.install(ConfigPlugin(config)) app.config.setdefault("canister.log_level", "DISABLED") app.config.setdefault("canister.log_path", False) app.config.setdefault("canister.debug", False) app.install(canister.Canister()) # Use DateAwareJSONEncoder to dump JSON strings # From http://stackoverflow.com/questions/21282040/bottle-framework-how-to-return-datetime-in-json-response#comment55718456_21282666. pylint: disable=locally-disabled,line-too-long app.install( bottle.JSONPlugin(json_dumps=functools.partial( json.dumps, cls=DateAwareJSONEncoder))) # Enable CORS @app.hook("after_request") def enable_cors(): """ Add CORS headers at each request. """ # The str() call is required as we import unicode_literal and WSGI # headers list should have plain str type. bottle.response.headers[str("Access-Control-Allow-Origin")] = str("*") bottle.response.headers[str("Access-Control-Allow-Methods")] = str( "PUT, GET, POST, DELETE, OPTIONS, PATCH") bottle.response.headers[str("Access-Control-Allow-Headers")] = str( "Origin, Accept, Content-Type, X-Requested-With, X-CSRF-Token") # API v1 routes app.route("/api/v1", ["GET", "OPTIONS"], api_routes.index_v1) app.route("/api/v1/time_to_places", ["GET", "OPTIONS"], api_routes.time_to_places_v1) app.route("/api/v1/flats", ["GET", "OPTIONS"], api_routes.flats_v1) app.route("/api/v1/flats/:flat_id", ["GET", "OPTIONS"], api_routes.flat_v1) app.route("/api/v1/flats/:flat_id", ["PATCH", "OPTIONS"], api_routes.update_flat_v1) app.route("/api/v1/ics/visits.ics", ["GET", "OPTIONS"], api_routes.ics_feed_v1) app.route("/api/v1/search", ["POST", "OPTIONS"], api_routes.search_v1) app.route("/api/v1/opendata", ["GET", "OPTIONS"], api_routes.opendata_index_v1) app.route( "/api/v1/opendata/postal_codes", ["GET", "OPTIONS"], api_routes.opendata_postal_codes_v1, ) app.route("/api/v1/metadata", ["GET", "OPTIONS"], api_routes.metadata_v1) app.route("/api/v1/import", ["GET", "OPTIONS"], api_routes.import_v1) # Index app.route("/", "GET", lambda: _serve_static_file("index.html")) # Static files app.route("/favicon.ico", "GET", lambda: _serve_static_file("favicon.ico")) app.route( "/assets/<filename:path>", "GET", lambda filename: _serve_static_file("/assets/{}".format(filename)), ) app.route( "/img/<filename:path>", "GET", lambda filename: _serve_static_file("/img/{}".format(filename)), ) app.route( "/.well-known/<filename:path>", "GET", lambda filename: _serve_static_file("/.well-known/{}".format(filename) ), ) app.route( "/data/img/<filename:path>", "GET", lambda filename: bottle.static_file( filename, root=os.path.join(config["data_directory"], "images")), ) return app
def import_and_filter(config, load_from_db=False, new_only=False): """ Fetch the available flats list. Then, filter it according to criteria. Finally, store it in the database. :param config: A config dict. :param load_from_db: Whether to load flats from database or fetch them using Woob. :return: ``None``. """ # Fetch and filter flats list past_flats = fetch.load_flats_from_db(config) if load_from_db: fetched_flats = past_flats else: fetched_flats = fetch.fetch_flats(config) # Do not fetch additional details if we loaded data from the db. flats_by_status = filter_fetched_flats( config, fetched_flats=fetched_flats, fetch_details=(not load_from_db), past_flats=past_flats if new_only else {}, ) # Create database connection get_session = database.init_db(config["database"], config["search_index"]) new_flats = [] result = [] LOGGER.info("Merging fetched flats in database...") # Flatten the flats_by_status dict flatten_flats_by_status = collections.defaultdict(list) for flats in flats_by_status.values(): for status, flats_list in flats.items(): flatten_flats_by_status[status].extend(flats_list) with get_session() as session: # Set is_expired to true for all existing flats. # This will be set back to false if we find them during importing. for flat in session.query(flat_model.Flat).all(): flat.is_expired = True for status, flats_list in flatten_flats_by_status.items(): # Build SQLAlchemy Flat model objects for every available flat flats_objects = {flat_dict["id"]: flat_model.Flat.from_dict(flat_dict) for flat_dict in flats_list} if flats_objects: # If there are some flats, try to merge them with the ones in # db existing_flats_queries = session.query(flat_model.Flat).filter( flat_model.Flat.id.in_(flats_objects.keys()) ) for each in existing_flats_queries.all(): # For each flat to merge, take care not to overwrite the # status if the user defined it flat_object = flats_objects[each.id] if each.status in flat_model.AUTOMATED_STATUSES: flat_object.status = getattr(flat_model.FlatStatus, status) else: flat_object.status = each.status # Every flat we fetched isn't expired flat_object.is_expired = False # For each flat already in the db, merge it (UPDATE) # instead of adding it session.merge(flats_objects.pop(each.id)) # For any other flat, it is not already in the database, so we can # just set the status field without worrying for flat in flats_objects.values(): flat.status = getattr(flat_model.FlatStatus, status) if flat.status == flat_model.FlatStatus.new: new_flats.append(flat) result.append(flat.id) session.add_all(flats_objects.values()) if config["send_email"]: email.send_notification(config, new_flats) LOGGER.info(f"Found {len(result)} new flats.") # Touch a file to indicate last update timestamp ts_file = os.path.join(config["data_directory"], "timestamp") with open(ts_file, "w"): os.utime(ts_file, None) LOGGER.info("Done!") return result
def import_and_filter(config, load_from_db=False): """ Fetch the available flats list. Then, filter it according to criteria. Finally, store it in the database. :param config: A config dict. :param load_from_db: Whether to load flats from database or fetch them using WebOOB. :return: ``None``. """ # Fetch and filter flats list if load_from_db: fetched_flats = fetch.load_flats_from_db(config) else: fetched_flats = fetch.fetch_flats(config) # Do not fetch additional details if we loaded data from the db. flats_by_status = filter_fetched_flats(config, fetched_flats=fetched_flats, fetch_details=(not load_from_db)) # Create database connection get_session = database.init_db(config["database"], config["search_index"]) new_flats = [] LOGGER.info("Merging fetched flats in database...") # Flatten the flats_by_status dict flatten_flats_by_status = collections.defaultdict(list) for flats in flats_by_status.values(): for status, flats_list in flats.items(): flatten_flats_by_status[status].extend(flats_list) with get_session() as session: for status, flats_list in flatten_flats_by_status.items(): # Build SQLAlchemy Flat model objects for every available flat flats_objects = { flat_dict["id"]: flat_model.Flat.from_dict(flat_dict) for flat_dict in flats_list } if flats_objects: # If there are some flats, try to merge them with the ones in # db existing_flats_queries = session.query(flat_model.Flat).filter( flat_model.Flat.id.in_(flats_objects.keys())) for each in existing_flats_queries.all(): # For each flat to merge, take care not to overwrite the # status if the user defined it flat_object = flats_objects[each.id] if each.status in flat_model.AUTOMATED_STATUSES: flat_object.status = getattr(flat_model.FlatStatus, status) else: flat_object.status = each.status # For each flat already in the db, merge it (UPDATE) # instead of adding it session.merge(flats_objects.pop(each.id)) # For any other flat, it is not already in the database, so we can # just set the status field without worrying for flat in flats_objects.values(): flat.status = getattr(flat_model.FlatStatus, status) if flat.status == flat_model.FlatStatus.new: new_flats.append(flat) session.add_all(flats_objects.values()) if config["send_email"]: email.send_notification(config, new_flats) LOGGER.info("Done!")