def ClusterHouses(matches, plot_groups=False): groups = {} try: N = len(matches) X = np.zeros((N, 2)) for m in range(N): loc = RFAPI.house_location(matches[m]) #logging.debug("ClusterHouses({})".format(loc)) X[m] = (loc[0], loc[1]) params = { 'quantile': .3, 'eps': .15, 'damping': .9, 'preference': -5, 'n_neighbors': 2, 'n_clusters': 5 } # a bit buggy.. spectral = cluster.SpectralClustering( n_clusters=params['n_clusters'], eigen_solver='arpack', affinity="nearest_neighbors") # best so far! gmm = mixture.GaussianMixture(n_components=params['n_clusters'], covariance_type='full') # yielded one cluster.. affinity_propagation = cluster.AffinityPropagation( damping=params['damping'], preference=params['preference']) bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile']) ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True) algorithm = ms algorithm.fit(X) if hasattr(algorithm, 'labels_'): y_pred = algorithm.labels_.astype(np.int) else: y_pred = algorithm.predict(X) for m in range(len(matches)): key = str(y_pred[m]) if groups.get(key, None) == None: groups[key] = [] groups[key].append({ "adress": RFAPI.house_address(matches[m]), "location": [X[m][0], X[m][1]] }) logging.debug("groups = {}".format(groups)) if plot_groups: HouseScore._plot_groups(X, y_pred) except Exception as e: groups["error"] = str(e) logging.error(groups["error"]) return groups
def distance(self, house, details): ret = 0.0 median, div, cutoff, weight = self._get_measure_facts("distance") fav_len = len(self.fav_locations) if fav_len == 0 or house.get("parcel") is None or house.get( "parcel").get("longitude") is None: house_address = RFAPI.house_address(house) if house_address != "": loc = AddressToLocation(house_address) if loc is not None and len(loc) == 2: house["parcel"] = {"latitude": loc[0], "longitude": loc[1]} return HouseScoreResult(cutoff * weight, cutoff, False, "Can't measure distance") for fav in self.fav_locations: dist = LocationDistance( [house["parcel"]["latitude"], house["parcel"]["longitude"]], fav["loc"]) diff = dist / div ret = ret + diff distance_average = ret / fav_len money = distance_average * weight message = "" if distance_average <= cutoff else "distance {}(mil) is larger than cut of {}(mil)".format( distance_average, cutoff) return HouseScoreResult(money, distance_average, (distance_average < cutoff), message)
def get_house_score_message(m): scores = m['scores'] id_str = RFAPI.house_address(m) if m.get("URL") is not None: id_str = "{} : http://www.redfin.com{}".format(id_str, m["URL"]) if scores.get("cutoff") is not None: return False, "{} => Cut for {{ {} }} score = {}".format( id_str, scores["cutoff"], scores) else: return True, ("{} => {}".format(id_str, scores))
def get_scores(self, house, details=None): scores = {} total_score = 0.0 for k in self.default_fields: if k == "value": continue method = getattr(self, k, None) if method is None: continue result = method(house, details) scores[k] = dict(**result._asdict()) total_score += result.money if not result.accepted: scores["cutoff"] = ",".join( filter(None, [scores.get("cutoff"), k])) # value evaluation must be run last result = self.value(house, details, total_score) scores["value"] = dict(**result._asdict()) if not result.accepted: scores["cutoff"] = ",".join( filter(None, [scores.get("cutoff"), "value"])) scores["facts"] = { "build": house.get("year_built", -1), "full_address": RFAPI.house_address(house), "beds": house["beds"], "sqft": house["sqft"] if house.get("sqft") is not None else 0.0, "baths": house.get("baths", 0), "price": house["price"], "County": RFAPI.house_county(house, details), "photo": RFAPI.house_photo_url(house, details), "neighborhoods": RFAPI.house_neighborhoods(house, details) if details is not None else [] } return scores