def find_all_publishable_rows_fitting_criteria(self, data): """TODO.""" # log.debug(data) query = SESSION.query(Cleaned).filter( Cleaned.detail_publish.is_(True)).filter( (Cleaned.sellers.ilike('%%%s%%' % data['name_address'])) | (Cleaned.buyers.ilike('%%%s%%' % data['name_address'])) | (Cleaned.address.ilike('%%%s%%' % data['name_address'])) | (Cleaned.instrument_no.ilike('%%%s%%' % data['name_address']) )).filter( Cleaned.neighborhood.ilike( '%%%s%%' % data['neighborhood'])).filter( Cleaned.zip_code.ilike( '%%%s%%' % data['zip_code'])).filter( Cleaned.document_date >= '%s' % data['begin_date']).filter( Cleaned.document_date <= '%s' % data['end_date']).filter( Cleaned.amount >= '%s' % data['amount_low']).filter( Cleaned.amount <= '%s' % data['amount_high']).all() # log.debug(query) SESSION.close() return query
def query_with_map_boundaries(self, data): """TODO.""" query = SESSION.query(Cleaned).filter( Cleaned.detail_publish.is_(True) ).filter( (Cleaned.sellers.ilike('%%%s%%' % data['name_address'])) | (Cleaned.buyers.ilike('%%%s%%' % data['name_address'])) | (Cleaned.address.ilike('%%%s%%' % data['name_address'])) | (Cleaned.instrument_no.ilike('%%%s%%' % data['name_address'])) ).filter(Cleaned.neighborhood.ilike( '%%%s%%' % data['neighborhood'])).filter( Cleaned.zip_code.ilike('%%%s%%' % data['zip_code'])).filter( Cleaned.document_date >= '%s' % data['begin_date']).filter( Cleaned.document_date <= '%s' % data['end_date']).filter( Cleaned.amount >= '%s' % data['amount_low']).filter( Cleaned.amount <= '%s' % data['amount_high'] ).filter((Cleaned.latitude <= data['bounds'][0]) & (Cleaned.latitude >= data['bounds'][2]) & (Cleaned.longitude <= data['bounds'][1]) & (Cleaned.longitude >= data['bounds'][3]) ).order_by(desc( Cleaned.document_date)).offset( '%d' % data['page_offset']).limit( '%d' % data['page_length']).all() SESSION.close() return query
def get_rows_with_null_rating(self): """ Return query result for locations with rating IS NULL. :returns: SQLAlchemy query result. """ query = SESSION.query( Location.rating, Location.document_id, Location.street_number, Location.address ).join( Detail ).filter( Location.rating.is_(None) ).filter( Detail.document_recorded >= '{}'.format(self.initial_date) ).filter( Detail.document_recorded <= '{}'.format(self.until_date) ).all() log.debug('Rows with rating is NULL: {}'.format(len(query))) SESSION.close() return query
def find_page_of_publishable_rows_fitting_criteria(self, data): """TODO.""" # log.debug(data) query = SESSION.query(Cleaned).filter( Cleaned.detail_publish.is_(True) ).filter( (Cleaned.sellers.ilike('%%%s%%' % data['name_address'])) | (Cleaned.buyers.ilike('%%%s%%' % data['name_address'])) | (Cleaned.address.ilike('%%%s%%' % data['name_address'])) | (Cleaned.instrument_no.ilike('%%%s%%' % data['name_address'])) ).filter(Cleaned.neighborhood.ilike( '%%%s%%' % data['neighborhood'])).filter( Cleaned.zip_code.ilike('%%%s%%' % data['zip_code'])).filter( Cleaned.document_date >= '%s' % data['begin_date']).filter( Cleaned.document_date <= '%s' % data['end_date']).filter( Cleaned.amount >= '%s' % data['amount_low']).filter( Cleaned.amount <= '%s' % data['amount_high']).order_by( desc(Cleaned.document_date)).offset( '%d' % int(data['page_offset'] ) # convert unicode to int ).limit('%d' % int(data['page_length'])).all() # log.debug(query) SESSION.close() return query
def get_existing_until_date(self): """TODO: Docstring.""" query_until_date = SESSION.query(Detail.document_recorded).order_by( Detail.document_recorded.desc()).limit(1).all() # Check if any records at all if len(query_until_date) == 0: # Make it so initialized date range will start from beginning. until_date = OPENING_DATE - timedelta(days=1) # log.debug(until_date) # log.debug(type(until_date)) else: log.debug(len(query_until_date)) for row in query_until_date: # TODO: will this fail w/o .one()? until_date = row.document_recorded # log.debug(until_date) # log.debug(type(until_date)) # log.debug(until_date) SESSION.close() return until_date
def join_subqueries(self): """Run a JOIN on subqueries.""" log.debug('join_subqueries') subq_vendees = self.get_vendees() subq_vendors = self.get_vendors() subq_location = self.get_locations() log.debug('query...') query = SESSION.query( Detail.document_id, Detail.amount, Detail.document_date, Detail.document_recorded, Detail.instrument_no, Detail.detail_publish, Detail.permanent_flag, subq_vendees.c.buyers, subq_vendors.c.sellers, subq_location.c.location_publish, subq_location.c.address, subq_location.c.location_info # TODO: Once SQLAlchemy supports WITHIN GROUP, uncomment these. # subq_location.c.zip_code, # subq_location.c.latitude, # subq_location.c.longitude, # subq_location.c.neighborhood ).join(subq_vendees).join(subq_vendors).join(subq_location).filter( Detail.document_recorded >= '{}'.format(self.initial_date)).filter( Detail.document_recorded <= '{}'.format(self.until_date)).all( ) log.debug('len(query): %d', len(query)) SESSION.close() return query
def get_locations(self): """Return SQL query of locations table for given date range.""" log.debug('get_locations') subquery = SESSION.query( Location.document_id, func.bool_and(Location.location_publish).label('location_publish'), func.string_agg( cast(Location.street_number, Text) + ' ' + cast(Location.address, Text), '; ').label('address'), func.string_agg( 'Unit: ' + cast(Location.unit, Text) + ', ' + 'Condo: ' + cast(Location.condo, Text) + ', ' + 'Weeks: ' + cast(Location.weeks, Text) + ', ' + 'Subdivision: ' + cast(Location.subdivision, Text) + ', ' + 'District: ' + cast(Location.district, Text) + ', ' + 'Square: ' + cast(Location.square, Text) + ', ' + 'Lot: ' + cast(Location.lot, Text), '; ').label('location_info') # todo: Once SQLAlchemy supports this, add these fields this way. # 'mode() WITHIN GROUP (ORDER BY locations.zip_code) AS zip_code', # 'mode() WITHIN GROUP (ORDER BY locations.latitude) AS latitude', # 'mode() WITHIN GROUP (ORDER BY locations.longitude) ' + # ' AS longitude', # 'mode() WITHIN GROUP (ORDER BY locations.neighborhood) ' + # 'AS neighborhood' ).group_by(Location.document_id).subquery() # log.debug(subquery) SESSION.close() return subquery
def get_sale(self, instrument_no): """TODO.""" data = {} data['update_date'] = self.get_last_updated_date() query = SESSION.query(Cleaned).filter( Cleaned.instrument_no == '%s' % (instrument_no)).filter( Cleaned.detail_publish.is_(True) # Only publish trusted data ).all() for row in query: row.amount = get_num_with_curr_sign(row.amount) row.document_date = ymd_to_full_date( (row.document_date).strftime('%Y-%m-%d'), no_day=True) # address = row.address # location_info = row.location_info data['assessor_publish'] = row.assessor_publish # newrows = query features = self.build_features_json(query) jsdata = {"type": "FeatureCollection", "features": features} SESSION.close() if len(query) == 0: return None, None, None else: return data, jsdata, query
def get_existing_until_date(self): """TODO: Docstring.""" query_until_date = SESSION.query( Detail.document_recorded ).order_by( Detail.document_recorded.desc() ).limit(1).all() # Check if any records at all if len(query_until_date) == 0: # Make it so initialized date range will start from beginning. until_date = OPENING_DATE - timedelta(days=1) # log.debug(until_date) # log.debug(type(until_date)) else: log.debug(len(query_until_date)) for row in query_until_date: # TODO: will this fail w/o .one()? until_date = row.document_recorded # log.debug(until_date) # log.debug(type(until_date)) # log.debug(until_date) SESSION.close() return until_date
def delete_cleaned(self): """TODO.""" SESSION.query(Cleaned).filter( Cleaned.document_recorded >= '{}'.format(self.initial_date) ).filter(Cleaned.document_recorded <= '{}'.format(self.until_date) ).delete() SESSION.commit()
def delete_details(self): """TODO.""" SESSION.query(Detail).filter( Detail.document_recorded >= '{}'.format(self.initial_date)).filter( Detail.document_recorded <= '{}'.format(self.until_date) ).delete() SESSION.commit()
def geocode(self): """Update latitude, longitude, rating and ZIP in Locations table.""" print('\nGeocoding...') null_rating_rows = self.get_rows_with_null_rating() for row in null_rating_rows: full_address = "{0} {1}, New Orleans, LA".format( row.street_number, row.address) result = self.gmaps.geocode(full_address) if len(result) == 0: log.info('No geocoding results for: {}'.format(full_address)) # TODO: Need to also note failure so future geocoding scripts # don't keep trying and failing on the same addresses. # Possibly update Location's `rating` and/or Cleaned's # `location_publish` fields. continue details = self.process_google_results(result) try: with SESSION.begin_nested(): u = update(Location) u = u.values(details) u = u.where(Location.document_id == row.document_id) SESSION.execute(u) SESSION.flush() except Exception as error: # TODO: Handle specific errors. log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def get_details(self): """Return SQL query of details table for given date range.""" subquery = SESSION.query(Detail).filter( Detail.document_recorded >= '{}'.format(self.initial_date)).filter( Detail.document_recorded <= '{}'.format(self.until_date) ).subquery() log.debug(subquery) SESSION.close() return subquery
def no_neighborhood_found(self): """If no neighborhood is found, update with "None" in nbhd field.""" log.debug('no_neighborhood_found') SESSION.query( Location ).filter( Location.neighborhood.is_(None) ).update( {Location.neighborhood: "None"}, synchronize_session='fetch' ) SESSION.commit()
def latest_date_no_flag(self): """Finds the latest date_recorded without permanent_flag set.""" query = SESSION.query( func.max(Detail.document_recorded).label('late_date')).filter( Detail.permanent_flag.is_(None)).all() for row in query: latest_none_date = row.late_date latest_none_datetime = datetime.combine(latest_none_date, datetime.min.time()) SESSION.close() return latest_none_datetime
def get_neighborhoods(self): """TODO.""" query = SESSION.query(Neighborhood.gnocdc_lab).all() neighborhoods = [] for neighborhood in query: neighborhoods.append( (neighborhood.gnocdc_lab).title().replace('Mcd', 'McD')) neighborhoods.sort() SESSION.close() return neighborhoods
def get_details(self): """Return SQL query of details table for given date range.""" subquery = SESSION.query( Detail ).filter( Detail.document_recorded >= '{}'.format(self.initial_date) ).filter( Detail.document_recorded <= '{}'.format(self.until_date) ).subquery() log.debug(subquery) SESSION.close() return subquery
def get_vendees(self): """Return SQL query of vendees table for given date range.""" log.debug('get_vendees') subquery = SESSION.query( Vendee.document_id, func.string_agg( cast(Vendee.vendee_firstname, Text) + " " + cast(Vendee.vendee_lastname, Text), ', ').label('buyers')).group_by(Vendee.document_id).subquery() # log.debug(subquery) SESSION.close() return subquery
def earliest_date_no_flag(self): """Find the earliest date_recorded without permanent_flag set.""" query = SESSION.query( func.min(Detail.document_recorded).label('early_date')).filter( Detail.permanent_flag.is_(None) # To satisfy PEP8 ).all() for row in query: earliest_none_date = row.early_date earliest_none_datetime = datetime.combine(earliest_none_date, datetime.min.time()) SESSION.close() return earliest_none_datetime
def query_search_term_limit_3(self, table, term): """ Get the top three results for autocomplete dropdown. :param table: string. The database to query. :type table: string :param term: string. The autocomplete term entered in search box. :type term: string :returns: A SQLAlchemy query result for three matches, at most. """ query = SESSION.query(getattr(Cleaned, table)).filter( getattr(Cleaned, table).ilike( '%%{}%%'.format(term))).distinct().limit(3).all() SESSION.close() return query
def earliest_date_no_flag(self): """Find the earliest date_recorded without permanent_flag set.""" query = SESSION.query( func.min(Detail.document_recorded).label('early_date') ).filter( Detail.permanent_flag.is_(None) # To satisfy PEP8 ).all() for row in query: earliest_none_date = row.early_date earliest_none_datetime = datetime.combine( earliest_none_date, datetime.min.time()) SESSION.close() return earliest_none_datetime
def latest_date_no_flag(self): """Finds the latest date_recorded without permanent_flag set.""" query = SESSION.query( func.max(Detail.document_recorded).label('late_date') ).filter( Detail.permanent_flag.is_(None) ).all() for row in query: latest_none_date = row.late_date latest_none_datetime = datetime.combine( latest_none_date, datetime.min.time()) SESSION.close() return latest_none_datetime
def commit_to_database(self, table, output): """Commit to database using nested transactions and exceptions.""" try: # TODO: Is this the correct method for this? with SESSION.begin_nested(): i = insert(getattr(db, table)) vals = i.values(output) SESSION.execute(vals) # TODO: What is this? SESSION.flush() except Exception as error: log.debug(error, exc_info=True) SESSION.rollback() SESSION.commit() # TODO: Should this be here?
def join_subqueries(self): """Run a JOIN on subqueries.""" log.debug('join_subqueries') subq_vendees = self.get_vendees() subq_vendors = self.get_vendors() subq_location = self.get_locations() log.debug('query...') query = SESSION.query( Detail.document_id, Detail.amount, Detail.document_date, Detail.document_recorded, Detail.instrument_no, Detail.detail_publish, Detail.permanent_flag, subq_vendees.c.buyers, subq_vendors.c.sellers, subq_location.c.location_publish, subq_location.c.address, subq_location.c.location_info # TODO: Once SQLAlchemy supports WITHIN GROUP, uncomment these. # subq_location.c.zip_code, # subq_location.c.latitude, # subq_location.c.longitude, # subq_location.c.neighborhood ).join( subq_vendees ).join( subq_vendors ).join( subq_location ).filter( Detail.document_recorded >= '{}'.format(self.initial_date) ).filter( Detail.document_recorded <= '{}'.format(self.until_date) ).all() log.debug('len(query): %d', len(query)) SESSION.close() return query
def get_last_updated_date(self): """TODO.""" query = SESSION.query(Cleaned).filter( Cleaned.detail_publish.is_(True)).order_by( desc(Cleaned.document_recorded)).limit(1).all() log.info(query) updated_date = '' for row in query: updated_date = ymd_to_full_date( (row.document_recorded).strftime('%Y-%m-%d'), no_day=True) log.info(updated_date) SESSION.close() return updated_date
def get_vendors(self): """Return SQL query of vendors table for given date range.""" log.debug('get_vendors') subquery = SESSION.query( Vendor.document_id, func.string_agg( cast(Vendor.vendor_firstname, Text) + " " + cast(Vendor.vendor_lastname, Text), ', ' ).label('sellers') ).group_by( Vendor.document_id ).subquery() # log.debug(subquery) SESSION.close() return subquery
def check_geocoder_good_rating(self): """Check if PostGIS Geocoder rating scored 3 or lower: good.""" SESSION.query( Location.rating, Location.location_publish ).filter( (Location.rating == 'RANGE_INTERPOLATED') | (Location.rating == 'ROOFTOP') ).update({"location_publish": True}) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def earliest_date_temp_flag(self): """Find earliest date with permanent_flag = False.""" query = SESSION.query( func.min(Detail.document_recorded).label('early_date')).filter( Detail.permanent_flag.is_(False) # To satisfy PEP8 ).all() for row in query: earliest_temp_date = row.early_date if earliest_temp_date is not None: earliest_temp_datetime = datetime.combine(earliest_temp_date, datetime.min.time()) log.debug(earliest_temp_datetime) SESSION.close() return earliest_temp_datetime else: SESSION.close() return None
def get_locations(self): """Return SQL query of locations table for given date range.""" log.debug('get_locations') subquery = SESSION.query( Location.document_id, func.bool_and(Location.location_publish).label('location_publish'), func.string_agg( cast(Location.street_number, Text) + ' ' + cast(Location.address, Text), '; ' ).label('address'), func.string_agg( 'Unit: ' + cast(Location.unit, Text) + ', ' + 'Condo: ' + cast(Location.condo, Text) + ', ' + 'Weeks: ' + cast(Location.weeks, Text) + ', ' + 'Subdivision: ' + cast(Location.subdivision, Text) + ', ' + 'District: ' + cast(Location.district, Text) + ', ' + 'Square: ' + cast(Location.square, Text) + ', ' + 'Lot: ' + cast(Location.lot, Text), '; ' ).label('location_info') # todo: Once SQLAlchemy supports this, add these fields this way. # 'mode() WITHIN GROUP (ORDER BY locations.zip_code) AS zip_code', # 'mode() WITHIN GROUP (ORDER BY locations.latitude) AS latitude', # 'mode() WITHIN GROUP (ORDER BY locations.longitude) ' + # ' AS longitude', # 'mode() WITHIN GROUP (ORDER BY locations.neighborhood) ' + # 'AS neighborhood' ).group_by( Location.document_id ).subquery() # log.debug(subquery) SESSION.close() return subquery
def check_geocoder_bad_rating(self): """Check if PostGIS Geocoder rating scored higher than 3: bad.""" SESSION.query( Location.rating, Location.location_publish ).filter( (Location.rating == 'GEOMETRIC_CENTER') | (Location.rating == 'APPROXIMATE') | (Location.rating.is_(None)) ).update({"location_publish": False}) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def earliest_date_temp_flag(self): """Find earliest date with permanent_flag = False.""" query = SESSION.query( func.min(Detail.document_recorded).label('early_date') ).filter( Detail.permanent_flag.is_(False) # To satisfy PEP8 ).all() for row in query: earliest_temp_date = row.early_date if earliest_temp_date is not None: earliest_temp_datetime = datetime.combine( earliest_temp_date, datetime.min.time()) log.debug(earliest_temp_datetime) SESSION.close() return earliest_temp_datetime else: SESSION.close() return None
def neighborhood_found(self): """Use PostGIS to find which neighborhood a long/lat pair is in.""" log.debug('neighborhood_found') SESSION.query( Location ).filter( func.ST_Contains( Neighborhood.geom, func.ST_SetSRID( func.ST_Point( cast(Location.longitude, Float), cast(Location.latitude, Float) ), 4326 ) ) ).update( {Location.neighborhood: Neighborhood.gnocdc_lab}, synchronize_session='fetch' ) SESSION.commit()
def commit_rows(self, rows): """Commit JOIN-ed rows to the cleaned table.""" log.debug('Committing %d rows', len(rows)) for count, row in enumerate(rows): log.debug("Row %d", count) try: with SESSION.begin_nested(): i = insert(Cleaned) i = i.values(row) SESSION.execute(i) SESSION.flush() except Exception as error: log.debug('count: %s', count) log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() log.debug('%d rows committed', len(rows))
def check_north_of_new_orleans(self): """Check if geocoded coords are within north border of New Orleans.""" # Lat less than 29.864543 is north of New Orleans: SESSION.query( Location.latitude, Location.location_publish ).filter( Location.latitude > 30.181719 ).update({ "location_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_west_of_new_orleans(self): """Check if geocoded coords are within west border of New Orleans.""" # Long less than -90.140388 is west of New Orleans: SESSION.query( Location.longitude, Location.location_publish ).filter( Location.longitude < -90.140388 ).update({ "location_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_low_amount(self): """Check if sale amount is unreasonably low (<= $0).""" # Not sure about these, so check them all for now to be safe SESSION.query( Detail.amount, Detail.detail_publish ).filter( Detail.amount <= 0 ).update({ "detail_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_high_amount(self): """Check if sale amount is unreasonably high (>= $20,000,000).""" # Anything over $20,000,000 wouldn't be impossible, but is rare SESSION.query( Detail.amount, Detail.detail_publish ).filter( Detail.amount >= 20000000 ).update({ "detail_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_if_no_date(self): """Check if sale has a date.""" SESSION.query( Detail.document_date, Detail.document_recorded, Detail.detail_publish ).filter( (Detail.document_date is None) | (Detail.document_recorded is None) ).update( {"detail_publish": False} ) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def make_all_locations_publishable(self): """ Assume all sales are publishable. Set location_publish = 1. Then set to 0 if questionable data is found. """ # Assume publishable, then check for reasons not to publish. SESSION.query( Location.location_publish ).update({ "location_publish": True }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_relative_date(self): """Check if sale date is >6 months prior to the recorded date.""" # Convert date strings to date format new_initial_date = datetime.strptime( self.initial_date, '%Y-%m-%d').date() new_until_date = datetime.strptime( self.until_date, '%Y-%m-%d').date() current_date = new_initial_date # Evaluate "30 days ago" based on that particular day while current_date != new_until_date: # Update date range old_date = current_date - timedelta(days=180) previous_date = current_date - timedelta(days=1) # Copy datetime objects to date strings old_date_string = old_date.strftime('%Y-%m-%d') previous_date_string = previous_date.strftime('%Y-%m-%d') current_date_string = current_date.strftime('%Y-%m-%d') # For sales recorded on a given day, check if the document # date is unbelievable (too old or in the future) try: with SESSION.begin_nested(): SESSION.query( Detail.document_recorded, Detail.document_date, Detail.detail_publish ).filter( Detail.document_recorded == current_date_string ).filter( Detail.document_date < old_date_string ).update({"detail_publish": False}) SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() try: with SESSION.begin_nested(): SESSION.query( Detail.document_recorded, Detail.document_date, Detail.detail_publish ).filter( Detail.document_recorded == current_date_string ).filter( Detail.document_date > previous_date_string ).update({ "detail_publish": False }) SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() current_date = current_date + timedelta(days=1)
def update_this_dates_permanent_flag(self, current_datetime, early_permanent_datetime, late_permanent_datetime): """TODO.""" cond = (early_permanent_datetime <= current_datetime and current_datetime <= late_permanent_datetime) if cond: SESSION.query( Detail ).filter( Detail.document_recorded == '%s' % current_datetime ).update({"permanent_flag": True}) SESSION.query( Cleaned ).filter( Cleaned.document_recorded == '%s' % current_datetime ).update({"permanent_flag": True}) SESSION.commit() else: SESSION.query( Detail ).filter( Detail.document_recorded == '%s' % current_datetime ).update({"permanent_flag": False}) SESSION.query( Cleaned ).filter( Cleaned.document_recorded == '%s' % current_datetime ).update({"permanent_flag": False}) SESSION.commit()