class PropertyService(plugins.Plugin): directions_service = injection.Dependency() property_repository = injection.Dependency() def find(self, max_price=None, favorite=None, area=None, limit=None): try: props = list( self.property_repository.find(max_price=max_price, favorite=favorite, area=area, limit=limit)) except exceptions.InvalidQuery as err: raise ValueError(str(err)) return props async def to_work(self, prop_id: str, mode: str, refresh: bool = False): prop = self.property_repository.get(prop_id) if prop.toWork and not refresh: return prop.toWork origin = prop.location route = await self.directions_service.to_work(origin, mode) prop = prop.replace(toWork=route) self.property_repository.update(prop) return route def favorite(self, prop_id: str, val: bool): prop = self.property_repository.get(prop_id) if prop.favorite != val: prop = prop.replace(favorite=val) self.property_repository.update(prop) def ban(self, prop_id: str, val: bool): prop = self.property_repository.get(prop_id) if prop.banned != val: prop = prop.replace(banned=val) self.property_repository.update(prop) def clear_properties(self, banned=False, favorites=False): self.property_repository.clear(banned=banned, favorites=favorites) def save_search_area(self, name: str, geojson): self.property_repository.set_search_area(name, geojson) def get_search_areas(self): return self.property_repository.get_search_areas()
class AuthService(injection.Component): user_repository = injection.Dependency() def register(self, username, password): if not (isinstance(password, str) and len(password) < 4): raise ValueError("Password should be a string of 4+ characters.") password = generate_password_hash(password) user = User(username=username, password=password) self.user_repository.add_user(user) def get_tokens(self, username, password): if not self._are_credentials_valid(username, password): return None return { "access_token": create_access_token(identity=username), "refresh_token": create_refresh_token(identity=username), "username": username, } def _are_credentials_valid(self, username, password): try: user = self.user_repository.get_user(username) except exceptions.EntityNotFound: return False return user.is_password_valid(password)
class MemoryConfiguration(DefaultConfiguration): """Canned config which is merged with the default config.""" config_overrides = injection.Dependency() def load_usercfg(self): return self.config_overrides
class CribPipeline(base.WithInjection): property_repository = injection.Dependency() def process_item(self, item, spider): if item["existing"]: self.property_repository.update(item["prop"]) else: self.property_repository.insert(item["prop"])
class ScrapeService(plugins.Plugin): _scrape = injection.Dependency("scrape") def scrape(self, search): spider = "rightmove" settings = { "RIGHTMOVE_SEARCHES": [search], } self._scrape.crawl(spider, loglevel="INFO", settings_override=settings)
class LoadedConfiguration(DefaultConfiguration): """Configuration which is loaded through a config loader component.""" config_file = injection.Dependency() def load_usercfg(self): if self.config_file: with open(self.config_file, "r") as fp: return _load(self.config_loaders, fp) else: return {}
class RightmoveSpider(base.WithInjection, scrapy.Spider): name: str = "rightmove" property_repository = injection.Dependency() def start_requests(self) -> Iterable[scrapy.Request]: urls = self.settings.getlist("RIGHTMOVE_SEARCHES") or [] for url in urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response: Response) -> PR: model = _load_model(response) for page in _get_pages(response, model): yield scrapy.Request(page, callback=self.parse_page) yield from self.parse_propertymodel(response, model) def parse_page(self, response: Response) -> PR: model = _load_model(response) yield from self.parse_propertymodel(response, model) def parse_propertymodel(self, response: Response, model: Dict) -> PR: properties = model["properties"] for data in properties: _make_id(data) existing = self.property_repository.get(data["id"]) if existing and existing.banned: continue callback = functools.partial(self.parse_property, data, existing) yield response.follow(data["propertyUrl"], callback=callback) def parse_property(self, data, existing, response: Response) -> PR: model = _load_property_page_model(response) propertyData = model["propertyData"] data["bedrooms"] = propertyData["bedrooms"] data["displayAddress"] = propertyData["address"]["displayAddress"] data["propertyImages"] = [img["url"] for img in propertyData["images"]] data["floorplanImages"] = [img["url"] for img in propertyData["floorplans"]] data["keyFeatures"] = propertyData["keyFeatures"] data["lettingInformation"] = propertyData["lettings"] prop = to_prop(data, existing) yield PropertyItem({"prop": prop, "existing": existing})
class DefaultConfiguration(injection.Component): """Default configuration of crib.""" config_loaders = injection.Dependency() def __init__(self, name, container): super().__init__(name, container) self._cfg = None def __getitem__(self, key): if self._cfg is None: self._cfg = self.load() return self._cfg.get(key, {}) def load(self): default = _load_default(self.config_loaders) user_cfg = self.load_usercfg() cfg = _merge_config(default, user_cfg) return cfg def load_usercfg(self): return {}
class Flask(injection.Component, Quart): _crib_config = injection.Infrastructure("config") user_repository = injection.Dependency() property_repository = injection.Dependency() directions_service = injection.Dependency() directions_repository = injection.Dependency() property_service = injection.Dependency() auth_service = injection.Dependency() scrape_service = injection.Dependency() def __init__(self, *args, **kwargs): self._name = None super(Flask, self).__init__(*args, **kwargs) @property def name(self): return self._name @name.setter def name(self, value): self._name = value
class ZooplaSpider(base.WithInjection, scrapy.Spider): name: str = "zoopla" property_repository = injection.Dependency() def start_requests(self) -> Iterable[scrapy.Request]: urls = self.settings.getlist("ZOOPLA_SEARCHES") or [] for url in urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response: Response) -> PR: model = json.loads(response.body) for page in _get_pages(response, model): yield scrapy.Request(page, callback=self.parse_page) yield from self.parse_propertymodel(response, model) def parse_page(self, response: Response) -> PR: model = json.loads(response.body) yield from self.parse_propertymodel(response, model) def parse_propertymodel(self, response: Response, model: Dict) -> PR: properties = model["listing"] for data in properties: _make_id(data) existing = self.property_repository.get(data["id"]) if existing and existing.banned: continue callback = functools.partial(self.parse_property, data, existing) yield response.follow(data["details_url"], callback=callback) def parse_property(self, data, existing, response: Response) -> PR: propd = { "bedrooms": int(data["num_bedrooms"]), "displayAddress": data["displayable_address"], "feesApply": False, "firstVisibleDate": _to_dt(data["first_published_date"]), "id": data["id"], "location": { "latitude": data["latitude"], "longitude": data["longitude"] }, "price": { "amount": data["rental_prices"]["per_month"], "currencyCode": "GPB", "frequency": "monthly", }, "propertyImages": self._get_property_images(response), "floorplanImages": self._get_floorplan_images(response), "propertySubType": data["property_type"], "propertyTypeFullDescription": "{} bedroom {}".format(data["num_bedrooms"], data["property_type"]), "propertyUrl": data["details_url"], "students": False, "summary": data["description"], "transactionType": "rent", "keyFeatures": self._get_key_features(response), "lettingInformation": { "Updated": _to_dt(data["last_published_date"]), "Furnishing": data["furnished_state"], }, "feesApplyText": data.get("letting_fees", ""), "favorite": existing.favorite if existing else False, "toWork": existing.toWork.asdict() if existing and existing.toWork else None, } prop = Property.fromdict(propd) yield PropertyItem({"prop": prop, "existing": existing}) def _get_key_features(self, response: Response) -> List[str]: xpath = "//ul[contains(@class, 'dp-features-list--bullets')]/li/text()" return [s.strip() for s in response.xpath(xpath).extract()] def _get_property_images(self, response: Response) -> List[str]: data = json.loads( response.xpath("/html/body/script[4]/text()").extract_first()) photos = data["@graph"][3]["photo"] return [p["contentUrl"] for p in photos] def _get_floorplan_images(self, response: Response) -> List[str]: xpath = "//div[@id = 'floorplan-1']//div[contains(@class, 'ui-modal-gallery__asset')]/@style" style = response.xpath(xpath).extract_first() if style: match = re.match(r".*url\('(.*)'\).*", style) if match: return [(match.group(1))] return []
class DirectionsService(plugins.Plugin): directions_repository = injection.Dependency() @classmethod def config_schema(cls) -> Dict[str, Any]: return { "work-location": { "type": "dict", "required": True, "schema": { "latitude": { "type": "float", "required": True }, "longitude": { "type": "float", "required": True }, }, }, "search-area": { "type": "dict", "required": True, "schema": { "northEast": { "type": "dict", "required": True, "schema": { "lat": { "type": "float", "required": True }, "lng": { "type": "float", "required": True }, }, }, "southWest": { "type": "dict", "required": True, "schema": { "lat": { "type": "float", "required": True }, "lng": { "type": "float", "required": True }, }, }, "latsamples": { "type": "float", "required": True }, "lngsamples": { "type": "float", "required": True }, }, }, } @abc.abstractmethod async def to_work(self, origin: Location, mode: str) -> Dict: return {} async def fetch_map_to_work(self, mode: str) -> None: for i, ll in list(enumerate(self.raster_map())): log.info("Fetching #%s", i) route = await self.to_work(Location(**ll), mode) try: d = Direction.fromdict(route) except Exception as err: log.info("%s", err) else: self.directions_repository.insert(d) def raster_map(self) -> Iterable[Dict]: ne = self.config["search-area"]["northEast"] sw = self.config["search-area"]["southWest"] latsamples = self.config["search-area"]["latsamples"] lngsamples = self.config["search-area"]["lngsamples"] latdelta = ne["lat"] - sw["lat"] lngdelta = ne["lng"] - sw["lng"] for lat in frange(sw["lat"], ne["lat"], latdelta / latsamples): for lng in frange(sw["lng"], ne["lng"], lngdelta / lngsamples): yield {"latitude": lat, "longitude": lng} def to_work_durations(self, colormap: str, maxDuration: int) -> Iterable[Dict[str, Any]]: try: cmap = cmocean.cm.cmap_d[colormap] except KeyError: raise ValueError(f"Invalid color map {colormap}") getDuration = operator.itemgetter("durationValue") durations = [ dur for dur in self.directions_repository.get_to_work_durations() if getDuration(dur) < maxDuration ] if not durations: return durations maxD = getDuration(max(durations, key=getDuration)) minD = getDuration(min(durations, key=getDuration)) colors = self._color_values(minD, maxD, cmap) offset = minD + 1 for d in durations: v = d["durationValue"] d["color"] = colors[v - offset] log.debug("Fetched %s durations", len(durations)) return durations def colormaps(self) -> Iterable[str]: return list(cmocean.cm.cmap_d.keys()) @staticmethod def _color_values(minV, maxV, colormap): delta = maxV - minV colormap = colormap._resample(delta) rgb_values = colormap(numpy.arange(delta))[:, :-1] hex_values = [rgb2hex(rgb) for rgb in rgb_values] return hex_values def get_area(self, max_duration=43 * 60, alpha=None, hullbuffer=None): area = self.directions_repository.get_to_work_area(max_duration) if area: return area directions = [[ d["location"][1], d["location"][0] ] for d in self.directions_repository.get_to_work_durations() if d["durationValue"] <= max_duration] area = map_analysis.get_area(directions, alpha, hullbuffer) self.directions_repository.insert_to_work_area( max_duration=max_duration, area=area) return area
class RightmoveSpider(base.WithInjection, scrapy.Spider): name: str = "rightmove" property_repository = injection.Dependency() def start_requests(self) -> Iterable[scrapy.Request]: urls = self.settings.getlist("RIGHTMOVE_SEARCHES") or [] for url in urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response: Response) -> PR: model = _load_model(response) for page in _get_pages(response, model): yield scrapy.Request(page, callback=self.parse_page) yield from self.parse_propertymodel(response, model) def parse_page(self, response: Response) -> PR: model = _load_model(response) yield from self.parse_propertymodel(response, model) def parse_propertymodel(self, response: Response, model: Dict) -> PR: properties = model["properties"] for data in properties: _make_id(data) existing = self.property_repository.get(data["id"]) if existing and existing.banned: continue callback = functools.partial(self.parse_property, data, existing) yield response.follow(data["propertyUrl"], callback=callback) def parse_property(self, data, existing, response: Response) -> PR: data["propertyImages"] = self._get_property_images(response) data["floorplanImages"] = self._get_floorplan_images(response) data["lettingInformation"] = self._get_letting_information(response) data["keyFeatures"] = self._get_key_features(response) data["summary"] = self._get_summary(response) prop = to_prop(data, existing) yield PropertyItem({"prop": prop, "existing": existing}) def _get_key_features(self, response: Response) -> List[str]: xpath = "//div[contains(@class,'key-features')]/ul/li/text()" return response.xpath(xpath).extract() def _get_letting_information(self, response: Response) -> Dict[str, str]: xpath = "//div[@id='lettingInformation']//td/text()" flat_info = response.xpath(xpath).extract() tuples = zip(flat_info[::2], flat_info[1::2]) table_info = dict((k.rstrip(": "), v) for k, v in tuples) return table_info def _get_summary(self, response: Response) -> str: xpath = "//div[@id='description']//div[@class='sect ']/node()" return "\n".join(response.xpath(xpath).extract()).strip() def _get_property_images(self, response: Response) -> List[str]: xpath = "//div[@class='gallery gallery-grid']/ul/*/a/img/@src" return response.xpath(xpath).extract() def _get_floorplan_images(self, response: Response) -> List[str]: xpath = "//div[contains(@class,'floorplancontent')]//img/@src" return list(set(response.xpath(xpath).extract()))