Exemplo n.º 1
0
zip_files = glob('*.zip')
csv_files = glob('*.csv')
unprocessed_files = sorted(
    set(zip_files) - set(f.replace('.csv', '') for f in csv_files))
log.info('Number of unprocessed files: %s', len(unprocessed_files))

for new_file in unprocessed_files:
    log.info('Extracting helicopter data from zip file: %s', new_file)
    return_code = subprocess.call(
        [os.path.join(scripts_root, 'process_file.sh'), new_file])
    if not return_code == 0:
        log.warning('Extraction of file failed with return code: %s',
                    return_code)

    log.info('Truncating raw_data table')
    dal = DataAccessLayer()
    dal.engine.execute('truncate raw_data')
    dal.session.commit()

    log.info('Loading %s into the database.', new_file + '.csv')
    return_code = subprocess.call(
        [os.path.join(scripts_root, 'copy_and_add.sh'), new_file + '.csv'])

    if not return_code == 0:
        log.warning('Adding file data to database failed with return code: %s',
                    return_code)

    log.info('Normalising raw data')
    process_raw_data()

    file_date = dateutil.parser.parse(new_file.replace('.zip', ''))
Exemplo n.º 2
0
def process_raw_data():
    dal = DataAccessLayer()

    RawData = dal.tbls['raw_data']
    Operator = dal.tbls['operator']
    Helicopter = dal.tbls['helicopter']
    PositionReading = dal.tbls['position_reading']

    # Get new operators
    operator_details = dal.session.query(RawData).outerjoin(Operator, Operator.c.operator_name == RawData.c.Op). \
        filter(Operator.c.operator_name == None). \
        group_by(RawData.c.Op, RawData.c.Cou).all()

    if operator_details:
        log.info('New operators found: %s', [op.Op for op in operator_details])
        dal.engine.execute(Operator.insert(), [{
            'operator_name': op.Op,
            'is_military': None,
            'operator_country': op.Cou,
        } for op in operator_details])
        dal.session.commit()
    else:
        log.info('No new operators found.')

    # Get new helicopters
    helicopter_details = dal.session.query(RawData, Operator). \
        outerjoin(Helicopter, Helicopter.c.helicopter_data_source_id == RawData.c.Id). \
        filter(Helicopter.c.helicopter_data_source_id == None). \
        join(Operator, Operator.c.operator_name == RawData.c.Op). \
        group_by(RawData.c.Id, RawData.c.Icao, RawData.c.Reg, RawData.c.Type, RawData.c.Mdl).all()

    if helicopter_details:
        log.info('New helicopters found: %s',
                 [h.Icao for h in helicopter_details])
        dal.engine.execute(Helicopter.insert(),
                           [{
                               'helicopter_data_source_id': h.Id,
                               'icao': h.Icao,
                               'registration': h.Reg,
                               'helicopter_type': h.Type,
                               'helicopter_model': h.Mdl,
                               'helicopter_operator_id': h.operator_id,
                           } for h in helicopter_details])
        dal.session.commit()
    else:
        log.info('No new helicopters found.')

    # Get new position readings
    position_readings = dal.session.query(RawData, Helicopter). \
        join(Helicopter, Helicopter.c.helicopter_data_source_id == RawData.c.Id).all()

    log.info('Inserting %s new position readings into the database',
             len(position_readings))
    dal.engine.execute(PositionReading.insert(), [{
        'helicopter_id': r.helicopter_id,
        'latitude': r.Lat,
        'longitude': r.Long,
        'altitude': r.Alt,
        'barometric_altitude': r.GAlt,
        'speed': r.Spd,
        'bearing': r.Trak,
        'time_stamp': r.TimeStamp,
    } for r in position_readings])
    dal.session.commit()