示例#1
0
文件: anz.py 项目: baicaicai/aeroster
    def _parse_roster_table(self):
        base_tz = timezone(self.roster_values.get('timezone')) or self.default_timezone
        base_iata = self.roster_values.get('base').iata or self.default_base
        roster_table_lines = re.findall(r'│(.*)│', self.roster_text_string)
        roster_table = etl.fromcsv(
            source=sources.MemorySource('\n'.join(roster_table_lines).encode('UTF-8')),
            delimiter='│'
        )
        self.staging_table = roster_table
        table1 = (
            # add header
            roster_table.pushheader(
                ['base_date', 'local_date', 'duty', 'activity', 'role', 'sector', 'pickup_time',
                 'rep_fin_time', 'etd_time', 'eta_time', 'flight_time', 'duty_time', 'on_city', 'utc_dev',
                 'dea', 'notes']
            )
            # select rows that match date pattern or whitespace in first column
            .search(0, '(?:' + self.wkdaydatepattern + ')|(?:\A\s+)')
            # split report and finish times (rep/fin) into separate columns
            .capture('rep_fin_time', '(.{4}).(.{4})', ['report_time', 'finish_time'])
            # split airport-from and airport-to (sector) into separate columns
            .capture('sector', '(.{3}).+(.{3})', ['sector_from', 'sector_to'])
            # remove whitespace from all values and convert empty values to None
            .convertall(lambda v: None if v.strip() == '' else v.strip())
            # select only rows that contain data
            # .select(lambda row: row.count(None) != len(row))
            .select(lambda row: list(OrderedDict.fromkeys(list(row)[2:])) == [None], complement=True)
        )
        table2 = (
            table1
            # copy base_date to local_date if local_date is None
            .convert('local_date',
                     lambda v, row: row['base_date'],
                     where=lambda row: row['local_date'] is None,
                     pass_row=True)
            .filldown('local_date')
            # convert local_date to datetime
            .convert('local_date', self._convert_weekday_datestring_to_date)
            # convert time fields to time type
            .convert(('report_time', 'finish_time', 'etd_time', 'eta_time', 'pickup_time'),
                     timeparser(self.timeformat))
            # combine date and times to create report/finish/etd/eta/pickup
            .addfield('report', lambda rec: datetime.combine(rec['local_date'],
                      rec['report_time']) if rec['report_time'] is not None else None)
            .addfield('finish', lambda rec: datetime.combine(rec['local_date'],
                      rec['finish_time']) if rec['finish_time'] is not None else None)
            .addfield('etd', lambda rec: datetime.combine(rec['local_date'],
                      rec['etd_time']) if rec['etd_time'] is not None else None)
            .addfield('eta', lambda rec: datetime.combine(rec['local_date'],
                      rec['eta_time']) if rec['eta_time'] is not None else None)
            .addfield('pickup', lambda rec: datetime.combine(rec['local_date'],
                      rec['pickup_time']) if rec['pickup_time'] is not None else None)
            # remove time fields now that we've combined them into a single datetime instance
            .cutout('report_time', 'finish_time', 'etd_time', 'eta_time', 'pickup_time')
            # fill down report_time until it finds it matches the first flight of the duty or the duty ends
            .filldown('report', until=lambda r: r['etd'] is not None or r['finish'] is not None)
            # copy sector_to
            .addfield('finish_airport', lambda rec: rec['sector_to'])
            .filldown('finish_airport', until=lambda r: r['finish'] is not None)
        )
        table3 = (
            table2
            # get timezone of report/finish/etd/eta/pickup location
            .addfield('report_tz', lambda rec: str(self.get_tz(iata_code=rec['sector_from']))
                      or str(base_tz) if rec['report'] is not None else None)
            .addfield('finish_tz', lambda rec: str(self.get_tz(iata_code=rec['finish_airport']))
                      or str(base_tz) if rec['finish'] is not None else None)
            .addfield('etd_tz', lambda rec: str(self.get_tz(iata_code=rec['sector_from']))
                      or str(base_tz) if rec['etd'] is not None else None)
            .addfield('eta_tz', lambda rec: str(self.get_tz(iata_code=rec['sector_to']))
                      or str(base_tz) if rec['eta'] is not None else None)
            .addfield('pickup_tz', lambda rec: str(self.get_tz(iata_code=rec['sector_from']))
                      or str(base_tz)if rec['pickup'] is not None else None)
            # make datetimes aware
            .convert('report',
                     lambda v, row: timezone(row['report_tz']).localize(v).astimezone(utc),
                     pass_row=True,
                     where=lambda r: r.report is not None)
            .convert('finish',
                     lambda v, row: timezone(row['finish_tz']).localize(v).astimezone(utc),
                     pass_row=True,
                     where=lambda r: r.finish is not None)
            .convert('etd',
                     lambda v, row: timezone(row['etd_tz']).localize(v).astimezone(utc),
                     pass_row=True,
                     where=lambda r: r.etd is not None)
            .convert('eta',
                     lambda v, row: timezone(row['eta_tz']).localize(v).astimezone(utc),
                     pass_row=True,
                     where=lambda r: r.eta is not None)
            .convert('pickup',
                     lambda v, row: timezone(row['pickup_tz']).localize(v).astimezone(utc),
                     pass_row=True,
                     where=lambda r: r.pickup is not None)
        )
        table4 = (
            table3
            # ignore rows that only contain on_city information
            .select(lambda row: self._all_fields_empty_except('on_city', row), complement=True)
        )
        table5 = (
            table4
            # find and fill down where duty repeats
            .filldown('duty',
                      where=self._next_duty_rep_found,
                      until=lambda r: r['finish_airport'] == base_iata and r['finish'] is not None)
            # # fill down report until it finds its corresponding finish entry
            .filldown('duty', 'report', 'report_tz', 'pickup', 'pickup_tz',
                      until=lambda r: r['finish'] is not None)
            # fill down Duty and Activity if notes span multiple lines
            .filldown(where=lambda row: self._all_fields_empty_except('notes', row),
                      until=lambda r: r['notes'] is None)
            # fill down etd until it finds its corresponding eta entry
            .filldown('activity', 'role', 'sector_from', 'etd', 'etd_tz',
                      where=lambda r: r['activity'] is None,
                      until=lambda r: r['eta'] is not None)
            # fill duty with activity value if duty is None
            .filldown('activity')
            .convert('duty',
                     lambda v, row: row['activity'],
                     where=lambda row: row['duty'] is None and row['activity'] is not None,
                     pass_row=True)
        ).cache()
        self.table1 = table1
        self.table2 = table2
        self.table3 = table3
        self.table4 = table4
        self.table5 = table5

        return table5
示例#2
0
from petl import dateparser

isodate = dateparser('%Y-%m-%d')
isodate('2002-12-25')
try:
    isodate('2002-02-30')
except ValueError as e:
    print(e)

# timeparser()
##############

from petl import timeparser

isotime = timeparser('%H:%M:%S')
isotime('00:00:00')
isotime('13:00:00')
try:
    isotime('12:00:99')
except ValueError as e:
    print(e)

try:
    isotime('25:00:00')
except ValueError as e:
    print(e)

# boolparser()
##############
示例#3
0
print(etl.valuecounter(table, 'status_code'))
table = clean_up(table, 'sta_cd')

######
print('CONVERT AND RENAME rep_nm to reported_by_name')
table = etl.addfield(table, 'reported_by_name', lambda x: x['rep_nm'])
table = clean_up(table, 'rep_nm')

######
print('CONVERT AND RENAME descript to incident_description')
table = etl.addfield(table, 'incident_description', lambda x: x['descript'])
table = clean_up(table, 'descript')

######
print('COMBINING occ_dt and occ_tm into incident_timestamp')
table = etl.convert(table, 'occ_tm', timeparser('%H:%M'))
table = etl.addfield(
    table, 'incident_timestamp', lambda x: datetime.combine(
        x['occ_dt'], (x['occ_tm'] or time(0, 0))) + timedelta(hours=8))

debug(table, ['occ_dt', 'occ_tm', 'incident_timestamp'])
table = clean_up(table, 'occ_dt')
table = clean_up(table, 'occ_tm')

#####
print("CREATING mine_incident_id_year from incident_timestamp")
table = etl.addfield(table, 'mine_incident_id_year',
                     lambda x: x['incident_timestamp'].year)
print(etl.valuecounter(table, 'mine_incident_id_year'))

######
示例#4
0
文件: parsers.py 项目: DeanWay/petl
##############

from petl import dateparser
isodate = dateparser('%Y-%m-%d')
isodate('2002-12-25')
try:
    isodate('2002-02-30')
except ValueError as e:
    print(e)


# timeparser()
##############

from petl import timeparser
isotime = timeparser('%H:%M:%S')
isotime('00:00:00')
isotime('13:00:00')
try:
    isotime('12:00:99')
except ValueError as e:
    print(e)

try:
    isotime('25:00:00')
except ValueError as e:
    print(e)


# boolparser()
##############