示例#1
0
from housinginsights.sources.mar import MarApiConn

from housinginsights.tools.logger import HILogger
logger = HILogger(name=__file__, logfile="sources.log")


def quick_address_cleanup(addr):
    #Used to perform common string replacements in addresses.
    #The key is original, value is what we want it to become
    #values match format typically found in the mar table
    #Allows first-pass matching of address strings to the MAR
    # (failed matches are then bumped up to more robust methods)
    address_string_mapping = {
        "Northeast": "NE",
        "Northwest": "NW",
        "Southeast": "SE",
        "Southwest": "SW",
        " St ": " Street ",
        " St. ": " Street ",
        " Pl ": " Place ",
        " Pl. ": " Place ",
        " Ave ": " Avenue ",
        " Ave. ": " Avenue ",
        "N.E.": "NE",
        "N.W.": "NW",
        "S.E.": "SE",
        "S.W.": "SW"
    }

    # Format addr by matching the conventions of the MAR
    for key, value in address_string_mapping.items():
示例#2
0
"""

import sys
import os
import importlib
from datetime import datetime
import argparse

python_filepath = os.path.abspath(
    os.path.join(os.path.dirname(__file__), os.pardir))
sys.path.append(python_filepath)

# Configure logging
import logging
from housinginsights.tools.logger import HILogger
logger = HILogger(name=__file__, logfile="sources.log", level=logging.INFO)

#TODO is this import necessary?
from housinginsights.config.base import HousingInsightsConfig
from housinginsights.ingestion.Manifest import Manifest


def get_multiple_api_sources(a):
    '''
    This method calls the 'get_data' method on each ApiConn class in the /sources folder
    a = an arguments object from argparse

    a.ids: list of unique data ids. Passing 'None' to unique_data_ids will run all get_data methods.
    a.sample: when possible, download just a few lines (for faster testing)
    a.database: the database choice, such as 'docker_database', as identified in the secrets.json.
    a.debug: if True exceptions will be raised. if False, they will be printed but processing will continue.
示例#3
0
import time
import sys
import os
import argparse

PYTHON_PATH = os.path.abspath(
    os.path.join(os.path.dirname(__file__), os.pardir))
sys.path.append(PYTHON_PATH)

from housinginsights.tools.logger import HILogger

logger = HILogger(name=__file__, logfile="ingestion.log")

for idx, value in enumerate(range(4)):
    logger.warning("Hi!")
    time.sleep(1)
示例#4
0
"""

import csv
import os
from uuid import uuid4

from housinginsights.config.base import HousingInsightsConfig
from housinginsights.sources.models.pres_cat import PROJ_FIELDS, \
    SUBSIDY_FIELDS, PROJ_ADDRE_FIELDS
from housinginsights.tools import dbtools

from housinginsights.sources.base import BaseApiConn
from housinginsights.tools import misc as misctools
from housinginsights.tools.logger import HILogger

logger = HILogger(name=__file__, logfile="proj_sources.log", level=10)


class ProjectBaseApiConn(BaseApiConn):
    """
    Adds additional methods needed for anything that deals with the project
    table, e.g. the DCHousingApiConn and the DhcdApiConn. Provides
    methods for splitting downloaded data into necessary 'projects'
    and 'subsidy' files. 

    Separated from the base class to avoid circular inheritance with 
    the MarApiConn that we use for entity resolution. 

    TODO could also make this a cleaning step instead of a download
    data step? But this would require refactor due to creation of
    two files from one, while ingestion process is set up with the 
示例#5
0
from datetime import datetime
import dateutil.parser as dateparser
import os
from uuid import uuid4

from housinginsights.ingestion.DataReader import HIReader
from housinginsights.sources.mar import MarApiConn
from housinginsights.sources.models.pres_cat import CLUSTER_DESC_MAP
from housinginsights.sources.google_maps import GoogleMapsApiConn
from housinginsights.sources.models.mar import MAR_TO_TABLE_FIELDS

PYTHON_PATH = os.path.abspath(
    os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))

from housinginsights.tools.logger import HILogger
logger = HILogger(name=__file__, logfile="ingestion.log")
"""
Usage:
Dynamically import based on name of class in meta.json:
http://stackoverflow.com/questions/4821104/python-dynamic-instantiation-from-string-name-of-a-class-in-dynamically-imported
"""


class CleanerBase(object, metaclass=ABCMeta):
    def __init__(self,
                 meta,
                 manifest_row,
                 cleaned_csv='',
                 removed_csv='',
                 engine=None):
        self.cleaned_csv = cleaned_csv
示例#6
0
import os, sys
python_filepath = os.path.abspath(
    os.path.join(os.path.dirname(__file__), os.pardir))
sys.path.append(python_filepath)

import argparse

from housinginsights.tools.logger import HILogger
import get_api_data
import load_data
from housinginsights.tools.mailer import HIMailer

loggers = [
    HILogger(name=__file__, logfile="services.log", level=10),
    HILogger(name=__file__, logfile="sources.log", level=10),
    HILogger(name=__file__, logfile="ingestion.log", level=10)
]
logger = loggers[0]


def run_get_api_data(debug=False):
    # TODO Figure out which parameters should be passed for this to run as a service.
    try:
        get_api_data.get_multiple_api_sources(db='docker_database')
    except Exception as e:
        logger.error("get_api_data failed with error: %s", e)
        if debug:
            raise e
    finally:
        get_api_data.send_log_file_to_admin(debug=debug)