示例#1
0
from lib_cinci.config import main
from lib_cinci.config import load
from lib_cinci.folders import (path_to_predictions, path_to_pickled_models,
    path_to_pickled_scalers, path_to_pickled_imputers)
import os
import logging
import logging.config

'''
    Using the --pickle option in model.py will dump the
    model, scaler and imputer objects. Use this to only keep
    the top_n models from each experiment and delete the rest.
'''

#logger config
logging.config.dictConfig(load('logger_config.yaml'))
logger = logging.getLogger()

#Directories to check for files
directories = [path_to_predictions,
                path_to_pickled_models,
                path_to_pickled_scalers,
                path_to_pickled_imputers]

#db connection
client = MongoClient(main['logger']['uri'])
db = client['models']
collection = db['cincinnati']

#Top n models to keep from each experiment
n = 20
示例#2
0
import pandas as pd 
from sqlalchemy import create_engine
import os
import yaml
from lib_cinci.config import load
import sys

folder = os.environ['ROOT_FOLDER']
output_folder = os.environ['OUTPUT_FOLDER']

path_to_output = os.path.join(output_folder, 'feature_crosstabs.csv')

connparams = load('config.yaml')['db']
uri = '{dialect}://{user}:{password}@{host}:{port}/{database}'.format(**connparams)
engine = create_engine(uri)

validation_schema = sys.argv[1]

# get all tables from feature schema, excluding
# 'insp2' tables (they are lookups, not features),
# named_entities and parcels_inspections
query = '''
        SELECT DISTINCT table_name 
        FROM information_schema.tables
        WHERE table_schema = '{schema}'
        AND SUBSTRING(table_name FROM 1 FOR 5) != 'insp2'
        AND table_name NOT IN ('parc_year', 'parcels_inspections', 
                               'named_entities');
        '''.format(schema=validation_schema)

all_tables = pd.read_sql(query, engine)
示例#3
0
#!/usr/bin/env python
import logging
import logging.config
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sqlalchemy import create_engine
import datetime
from lib_cinci.config import load

logging.config.dictConfig(load('logger_config.yaml'))
logger = logging.getLogger()

years = [
    '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015'
]

tax_dfs = {
    '2007': 'taxes07',
    '2008': 'taxes08',
    '2009': 'taxes09',
    '2010': 'taxes10',
    '2011': 'taxes11',
    '2012': 'taxes12',
    '2013': 'taxes13',
    '2014': 'taxes14',
    '2015': 'taxes15'
}


def format_parcels_list(parcels):
示例#4
0
文件: fire.py 项目: dssg/cincinnati
import logging
import logging.config
from feature_utils import make_inspections_address_nmonths_table, compute_frequency_features
from feature_utils import format_column_names, group_and_count_from_db
from lib_cinci.config import load
from lib_cinci.features import check_date_boundaries

# Config logger
logging.config.dictConfig(load("logger_config.yaml"))
logger = logging.getLogger()


def make_fire_features(con, n_months, max_dist):
    """
    Make Fire features

    Input:
    db_connection: connection to postgres database.
                   "set schema ..." must have been called on this connection
                   to select the correct schema from which to load inspections

    Output:
    A pandas dataframe, with one row per inspection and one column per feature.
    """
    dataset = "fire"
    date_column = "incident_date"

    # Get the time window for which you can generate features
    min_insp, max_insp = check_date_boundaries(con, n_months, dataset, date_column)

    make_inspections_address_nmonths_table(