from housinginsights.sources.mar import MarApiConn from housinginsights.tools.logger import HILogger logger = HILogger(name=__file__, logfile="sources.log") def quick_address_cleanup(addr): #Used to perform common string replacements in addresses. #The key is original, value is what we want it to become #values match format typically found in the mar table #Allows first-pass matching of address strings to the MAR # (failed matches are then bumped up to more robust methods) address_string_mapping = { "Northeast": "NE", "Northwest": "NW", "Southeast": "SE", "Southwest": "SW", " St ": " Street ", " St. ": " Street ", " Pl ": " Place ", " Pl. ": " Place ", " Ave ": " Avenue ", " Ave. ": " Avenue ", "N.E.": "NE", "N.W.": "NW", "S.E.": "SE", "S.W.": "SW" } # Format addr by matching the conventions of the MAR for key, value in address_string_mapping.items():
""" import sys import os import importlib from datetime import datetime import argparse python_filepath = os.path.abspath( os.path.join(os.path.dirname(__file__), os.pardir)) sys.path.append(python_filepath) # Configure logging import logging from housinginsights.tools.logger import HILogger logger = HILogger(name=__file__, logfile="sources.log", level=logging.INFO) #TODO is this import necessary? from housinginsights.config.base import HousingInsightsConfig from housinginsights.ingestion.Manifest import Manifest def get_multiple_api_sources(a): ''' This method calls the 'get_data' method on each ApiConn class in the /sources folder a = an arguments object from argparse a.ids: list of unique data ids. Passing 'None' to unique_data_ids will run all get_data methods. a.sample: when possible, download just a few lines (for faster testing) a.database: the database choice, such as 'docker_database', as identified in the secrets.json. a.debug: if True exceptions will be raised. if False, they will be printed but processing will continue.
import time import sys import os import argparse PYTHON_PATH = os.path.abspath( os.path.join(os.path.dirname(__file__), os.pardir)) sys.path.append(PYTHON_PATH) from housinginsights.tools.logger import HILogger logger = HILogger(name=__file__, logfile="ingestion.log") for idx, value in enumerate(range(4)): logger.warning("Hi!") time.sleep(1)
""" import csv import os from uuid import uuid4 from housinginsights.config.base import HousingInsightsConfig from housinginsights.sources.models.pres_cat import PROJ_FIELDS, \ SUBSIDY_FIELDS, PROJ_ADDRE_FIELDS from housinginsights.tools import dbtools from housinginsights.sources.base import BaseApiConn from housinginsights.tools import misc as misctools from housinginsights.tools.logger import HILogger logger = HILogger(name=__file__, logfile="proj_sources.log", level=10) class ProjectBaseApiConn(BaseApiConn): """ Adds additional methods needed for anything that deals with the project table, e.g. the DCHousingApiConn and the DhcdApiConn. Provides methods for splitting downloaded data into necessary 'projects' and 'subsidy' files. Separated from the base class to avoid circular inheritance with the MarApiConn that we use for entity resolution. TODO could also make this a cleaning step instead of a download data step? But this would require refactor due to creation of two files from one, while ingestion process is set up with the
from datetime import datetime import dateutil.parser as dateparser import os from uuid import uuid4 from housinginsights.ingestion.DataReader import HIReader from housinginsights.sources.mar import MarApiConn from housinginsights.sources.models.pres_cat import CLUSTER_DESC_MAP from housinginsights.sources.google_maps import GoogleMapsApiConn from housinginsights.sources.models.mar import MAR_TO_TABLE_FIELDS PYTHON_PATH = os.path.abspath( os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) from housinginsights.tools.logger import HILogger logger = HILogger(name=__file__, logfile="ingestion.log") """ Usage: Dynamically import based on name of class in meta.json: http://stackoverflow.com/questions/4821104/python-dynamic-instantiation-from-string-name-of-a-class-in-dynamically-imported """ class CleanerBase(object, metaclass=ABCMeta): def __init__(self, meta, manifest_row, cleaned_csv='', removed_csv='', engine=None): self.cleaned_csv = cleaned_csv
import os, sys python_filepath = os.path.abspath( os.path.join(os.path.dirname(__file__), os.pardir)) sys.path.append(python_filepath) import argparse from housinginsights.tools.logger import HILogger import get_api_data import load_data from housinginsights.tools.mailer import HIMailer loggers = [ HILogger(name=__file__, logfile="services.log", level=10), HILogger(name=__file__, logfile="sources.log", level=10), HILogger(name=__file__, logfile="ingestion.log", level=10) ] logger = loggers[0] def run_get_api_data(debug=False): # TODO Figure out which parameters should be passed for this to run as a service. try: get_api_data.get_multiple_api_sources(db='docker_database') except Exception as e: logger.error("get_api_data failed with error: %s", e) if debug: raise e finally: get_api_data.send_log_file_to_admin(debug=debug)