def geocode(self): """Update latitude, longitude, rating and ZIP in Locations table.""" print('\nGeocoding...') null_rating_rows = self.get_rows_with_null_rating() for row in null_rating_rows: full_address = "{0} {1}, New Orleans, LA".format( row.street_number, row.address) result = self.gmaps.geocode(full_address) if len(result) == 0: log.info('No geocoding results for: {}'.format(full_address)) # TODO: Need to also note failure so future geocoding scripts # don't keep trying and failing on the same addresses. # Possibly update Location's `rating` and/or Cleaned's # `location_publish` fields. continue details = self.process_google_results(result) try: with SESSION.begin_nested(): u = update(Location) u = u.values(details) u = u.where(Location.document_id == row.document_id) SESSION.execute(u) SESSION.flush() except Exception as error: # TODO: Handle specific errors. log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def commit_to_database(self, table, output): """Commit to database using nested transactions and exceptions.""" try: # TODO: Is this the correct method for this? with SESSION.begin_nested(): i = insert(getattr(db, table)) vals = i.values(output) SESSION.execute(vals) # TODO: What is this? SESSION.flush() except Exception as error: log.debug(error, exc_info=True) SESSION.rollback() SESSION.commit() # TODO: Should this be here?
def check_geocoder_good_rating(self): """Check if PostGIS Geocoder rating scored 3 or lower: good.""" SESSION.query( Location.rating, Location.location_publish ).filter( (Location.rating == 'RANGE_INTERPOLATED') | (Location.rating == 'ROOFTOP') ).update({"location_publish": True}) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_geocoder_bad_rating(self): """Check if PostGIS Geocoder rating scored higher than 3: bad.""" SESSION.query( Location.rating, Location.location_publish ).filter( (Location.rating == 'GEOMETRIC_CENTER') | (Location.rating == 'APPROXIMATE') | (Location.rating.is_(None)) ).update({"location_publish": False}) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def commit_rows(self, rows): """Commit JOIN-ed rows to the cleaned table.""" log.debug('Committing %d rows', len(rows)) for count, row in enumerate(rows): log.debug("Row %d", count) try: with SESSION.begin_nested(): i = insert(Cleaned) i = i.values(row) SESSION.execute(i) SESSION.flush() except Exception as error: log.debug('count: %s', count) log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() log.debug('%d rows committed', len(rows))
def check_west_of_new_orleans(self): """Check if geocoded coords are within west border of New Orleans.""" # Long less than -90.140388 is west of New Orleans: SESSION.query( Location.longitude, Location.location_publish ).filter( Location.longitude < -90.140388 ).update({ "location_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_high_amount(self): """Check if sale amount is unreasonably high (>= $20,000,000).""" # Anything over $20,000,000 wouldn't be impossible, but is rare SESSION.query( Detail.amount, Detail.detail_publish ).filter( Detail.amount >= 20000000 ).update({ "detail_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_low_amount(self): """Check if sale amount is unreasonably low (<= $0).""" # Not sure about these, so check them all for now to be safe SESSION.query( Detail.amount, Detail.detail_publish ).filter( Detail.amount <= 0 ).update({ "detail_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_north_of_new_orleans(self): """Check if geocoded coords are within north border of New Orleans.""" # Lat less than 29.864543 is north of New Orleans: SESSION.query( Location.latitude, Location.location_publish ).filter( Location.latitude > 30.181719 ).update({ "location_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def make_all_locations_publishable(self): """ Assume all sales are publishable. Set location_publish = 1. Then set to 0 if questionable data is found. """ # Assume publishable, then check for reasons not to publish. SESSION.query( Location.location_publish ).update({ "location_publish": True }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_if_no_date(self): """Check if sale has a date.""" SESSION.query( Detail.document_date, Detail.document_recorded, Detail.detail_publish ).filter( (Detail.document_date is None) | (Detail.document_recorded is None) ).update( {"detail_publish": False} ) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_relative_date(self): """Check if sale date is >6 months prior to the recorded date.""" # Convert date strings to date format new_initial_date = datetime.strptime( self.initial_date, '%Y-%m-%d').date() new_until_date = datetime.strptime( self.until_date, '%Y-%m-%d').date() current_date = new_initial_date # Evaluate "30 days ago" based on that particular day while current_date != new_until_date: # Update date range old_date = current_date - timedelta(days=180) previous_date = current_date - timedelta(days=1) # Copy datetime objects to date strings old_date_string = old_date.strftime('%Y-%m-%d') previous_date_string = previous_date.strftime('%Y-%m-%d') current_date_string = current_date.strftime('%Y-%m-%d') # For sales recorded on a given day, check if the document # date is unbelievable (too old or in the future) try: with SESSION.begin_nested(): SESSION.query( Detail.document_recorded, Detail.document_date, Detail.detail_publish ).filter( Detail.document_recorded == current_date_string ).filter( Detail.document_date < old_date_string ).update({"detail_publish": False}) SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() try: with SESSION.begin_nested(): SESSION.query( Detail.document_recorded, Detail.document_date, Detail.detail_publish ).filter( Detail.document_recorded == current_date_string ).filter( Detail.document_date > previous_date_string ).update({ "detail_publish": False }) SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() current_date = current_date + timedelta(days=1)