def main(): cfg = settings.get_config() data_dir = os.path.join(cfg['ingestion_settings']['data_directory']) for source in cfg['datasources'].keys(): if source is not 'indego': stations_dir = os.path.join(data_dir, source, 'stations') if os.path.isdir(stations_dir): mappings = cfg['datasources'][source]['station_fields'] for file in os.listdir(stations_dir): if file.endswith('.csv'): print('Processing ' + source + ' ' + file) with open(os.path.join(stations_dir, file), 'r') as fh: station_data = CSVSource(fh) for row in station_data: fix_mappings(row, mappings) if 'short_name' in row.keys(): if 'capacity' not in row.keys(): row['capacity'] = -1 row['system_name'] = source row['system_id'] = dw.system_dimension.ensure( row) setdefaults(row, DEFAULTS) insert_station_dimensions(row)
def main(): cfg = settings.get_config() db_conn = util.get_database_connection() data_dir = os.path.join(cfg['ingestion_settings']['data_directory']) source = 'indego' stations_dir = os.path.join(data_dir, source, 'stations') if os.path.isdir(stations_dir): mappings = cfg['datasources'][source]['station_fields'] names = [] for file in os.listdir(stations_dir): if file.endswith('.csv'): print('Processing ' + source + ' ' + file) with open(os.path.join(stations_dir, file), 'r') as fh: data = csv.DictReader(fh) for row in data: fix_mappings( row, cfg['datasources']['indego']['station_fields']) names.append((row['name'], row['short_name'])) db_conn.executemany(QUERY_UPDATE_START_STATIONS, names) db_conn.executemany(QUERY_UPDATE_END_STATIONS, names) db_conn.commit() db_conn.close()
#!/usr/bin/env python """ Ingest trip data into the staging database. """ import csv import logging import os import time from dateutil import parser from pygrametl.datasources import CSVSource import pygrametl from model.DW import DW from settings import settings cfg = settings.get_config() dw = DW() DEFAULTS = [ ('customer_gender', 'unspecified'), ('customer_birthyear', -1), ('customer_type', 'unspecified'), ('trip_category', 'Round Trip'), ('start_station_name', 'unspecified'), ('start_station_latitude', None), ('start_station_longitude', None), ('start_station_capacity', None), ('end_station_name', 'unspecified'), ('end_station_latitude', None), ('end_station_longitude', None), ('end_station_capacity', None) ] logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)-8s %(message)s') logger = logging.getLogger() ch = logging.StreamHandler()
def get_database_connection(): cfg = settings.get_config() return sqlite3.connect(cfg['ingestion_settings']['staging_db_location'])
from googlemaps import Client from googlemaps.elevation import elevation from settings import settings from util import util API_KEY = settings.get_config()['google_api']['api_key'] GET_START_STATIONS_QUERY = """ SELECT start_station_id, start_station_latitude, start_station_longitude FROM start_station WHERE start_station_latitude NOT IN('#N/A', '') AND start_station_longitude NOT IN ('#N/A', '') AND start_station_elevation IS NULL LIMIT 500 """ GET_END_STATIONS_QUERY = """ SELECT end_station_id, end_station_latitude, end_station_longitude FROM end_station WHERE end_station_latitude NOT IN ('#N/A', '') AND end_station_longitude NOT IN ('#N/A', '') AND end_station_elevation IS NULL LIMIT 500 """ UPDATE_START_STATION_QUERY = """ UPDATE start_station SET start_station_elevation = ? WHERE start_station_latitude = ? AND start_station_longitude = ? """
def set_config(application): application['config'] = get_config()
default=False, help="Start as webservice") args = parser.parse_args() def cli(): minifyier = Minifyer(Base64EncoderStrategy()) actions_mapping = { "minify": minifyier.minify, "deminify": minifyier.deminify } result = actions_mapping[args.action](args.url) print(result) def start_web_server(): print("Start Flask") start() if __name__ == "__main__": get_arguments() get_config(args.config) if args.http: start_web_server() else: cli()