Exemplo n.º 1
0
def main(dbsetting: 'path/to/config.cfg' = None,
         years: '[[YYYYMMDD, YYYYMMDD]]' = None,
         direct: bool = None,
         live: bool = False,
         analysis: List[int] = []):
    """
    Main method. Connect to blip WSDL Client, update analysis configurations,
    then pull data for specified dates
        :param dbsetting=None: 
            Path to the config.cfg file
        :param years=None: 
            List of [[YYYYMMDD, YYYYMMDD]] pairs
        :param direct=None: 
            Specify to ignore the HTTPS_PROXY environment variable.
    """

    config = configparser.ConfigParser()
    config.read(dbsetting)
    dbset = config['DBSETTINGS']
    api_settings = config['API']

    # Access the API using zeep
    LOGGER.info('Fetching config from blip server')
    blip, config = get_wsdl_client(api_settings['WSDLfile'], direct, live)

    if live:
        db = DB(**dbset)
        query = db.query(
            "SELECT analysis_id, report_name from king_pilot.bt_segments INNER JOIN bluetooth.all_analyses USING(analysis_id)"
        )
        routes_to_pull = {
            analysis_id: dict(report_name=report_name)
            for analysis_id, report_name in query.getresult()
        }
    else:
        #Querying data that's been further processed overnight
        if not analysis:
            # list of all route segments
            all_analyses = blip.service.getExportableAnalyses(
                api_settings['un'], api_settings['pw'])
            LOGGER.info('Updating route configs')
            routes_to_pull = update_configs(all_analyses, dbset)
        else:
            db = DB(**dbset)
            LOGGER.info(
                'Fetching info on the following analyses from the database: %s',
                analysis)
            sql = '''WITH analyses AS (SELECT unnest(%(analysis)s::bigint[]) AS analysis_id)
                    SELECT analysis_id, report_name FROM bluetooth.all_analyses INNER JOIN analyses USING(analysis_id)'''
            query = db.query_formatted(sql, {'analysis': analysis})
            routes_to_pull = {
                analysis_id: dict(report_name=report_name)
                for analysis_id, report_name in query.getresult()
            }
        date_to_process = None

    if years is None and live:
        date_to_process = datetime.datetime.now().replace(minute=0,
                                                          second=0,
                                                          microsecond=0)
        years = {date_to_process.year: [date_to_process.month]}
    elif years is None:
        # Use today's day to determine month to process
        date_to_process = date.today() + relativedelta(days=-1)
        years = {date_to_process.year: [date_to_process.month]}
    else:
        # Process and test whether the provided yyyymm is accurate
        years = validate_multiple_yyyymmdd_range(years)

    for year in years:
        for (analysis_id, analysis), month in product(routes_to_pull.items(),
                                                      years[year]):
            if date_to_process is None:
                days = list(years[year][month])
                LOGGER.info('Reading from: %s y: %s m: %s days: %s-%s',
                            analysis['report_name'], str(year), str(month),
                            str(days[0]), str(days[-1]))
                config.startTime = datetime.datetime(year, month, days[0], 0,
                                                     0, 0)
            elif live:
                LOGGER.info('Reading from: %s at %s ', analysis['report_name'],
                            date_to_process.strftime('%Y-%m-%d %H:00'))
                config.startTime = date_to_process + relativedelta(hours=-1)
                config.endTime = date_to_process
            else:
                days = [date_to_process]

                config.startTime = datetime.datetime.combine(
                    date_to_process, datetime.datetime.min.time())
                LOGGER.info('Reading from: %s for %s ',
                            analysis['report_name'],
                            date_to_process.strftime('%Y-%m-%d'))

            config.analysisId = analysis_id
            objectList = []
            if live:
                objectList.extend(
                    get_data_for_config(blip, api_settings['un'],
                                        api_settings['pw'], config))
            else:
                ks = [0, 1, 2, 3]
                for _, k in product(days, ks):
                    if k == 0:
                        config.endTime = config.startTime + \
                            datetime.timedelta(hours=8)
                    elif k == 1:
                        config.endTime = config.startTime + \
                            datetime.timedelta(hours=6)
                    elif k == 2 or k == 3:
                        config.endTime = config.startTime + \
                            datetime.timedelta(hours=5)

                    objectList.extend(
                        get_data_for_config(blip, api_settings['un'],
                                            api_settings['pw'], config))

                    if k == 0:
                        config.startTime = config.startTime + \
                            datetime.timedelta(hours=8)
                    elif k == 1:
                        config.startTime = config.startTime + \
                            datetime.timedelta(hours=6)
                    elif k == 2 or k == 3:
                        config.startTime = config.startTime + \
                            datetime.timedelta(hours=5)
                    time.sleep(1)
            try:
                insert_data(objectList, dbset, live)
            except OSError as ose:
                LOGGER.error('Inserting data failed')
                LOGGER.error(ose.msg)
            except ValueError as valu:
                LOGGER.error('Unsupported Value in insert')
                LOGGER.error(valu.msg)
            except IntegrityError:
                LOGGER.warning(
                    'Insert violated table constraints, likely duplicate data')

    if not live:
        LOGGER.info('Moving raw data to observations.')

        move_data(dbset)

    LOGGER.info('Processing Complete.')