示例#1
0
def main():
    csv_out = open(argv[2], 'w', 'utf-8')

    for page in read_pages(argv[1], START_PAGE):
        elements = text_elements(page)
        elements = bbox(elements, top=752)
        columns = layout_columns(elements, LAYOUT_COLS) 
        for col in columns:
            to_airport = ''
            from_airport = ''
            for flight in data_columns(col, FLIGHT_COLS):
                if flight[0].startswith('TO'):
                    to_airport = airport_code(flight[0])
                    continue
                if flight[0].startswith('FROM'):
                    from_airport = airport_code(flight[0])
                    continue
                if flight[0] == 'From-To':
                    continue
                if flight[0] == 'Validity':
                    continue
                if flight[2].startswith('Operated By'):
                    continue
                if flight[3].startswith('Operated By'):
                    continue
                if flight[1].startswith('Operated By'):
                    continue
                if flight[2] == 'Consult your travel agent for details':
                    continue
                if flight[2] == 'Schedules continue on following page':
                    continue
                if flight[0].startswith('('):
                    continue

                if from_airport is None:
                    continue

                flight = [from_airport, to_airport] + flight
                print >> csv_out, ','.join(flight)
示例#2
0
def main():
    csv_out = open(argv[2], 'w', 'utf-8')

    to_airport = None
    from_airport = None
    for pg, page in enumerate(read_pages(argv[1], START_PAGE)):
        elements = text_elements(page)
        elements = bbox(elements, top=735, bottom=31)
        columns = layout_columns(elements, LAYOUT_COLS) 
        for i, col in enumerate(columns):
            this_flight = None
            operated_by = ''
            effective_from = ''
            effective_to = ''
            for flight in data_columns(col, FLIGHT_COLS, 1):
                all_text = ' '.join(flight).strip()
                #print all_text

                if flight[0].startswith('To '):
                    airport = airport_code(all_text)
                    if airport:
                        to_airport = airport
                    continue

                cnt = False

                match = AIRPORT_CODE_RE.match(flight[4])
                if match:
                    from_airport = match.groups()[0]
                    continue
                match = re.match('Operated By (.+?)( For .+|;.+)?$', all_text)
                if match:
                    operated_by = match.groups()[0]
                    cnt = True
                match = re.match('Above Eff. (\d+/\d+)(?: thru (\d+/\d+))?$', all_text)
                if match:
                    effective_from, effective_to = match.groups()
                    cnt = True
                match = re.match('Above Disc. (\d+/\d+)$', all_text)
                if match:
                    effective_to = match.groups()[0]
                    cnt = True
                match = re.match('Above Ops (\d+/\d+) Only$', all_text)
                if match:
                    effective_from = effective_to = match.groups()[0]
                    cnt = True

                if cnt:
                    continue
                
                if this_flight:
                    if effective_to is None:
                        effective_to = ''
                    print >> csv_out, ','.join([
                        #str(pg),
                        #str(i),
                        from_airport,
                        to_airport,
                        operated_by,
                        effective_from,
                        effective_to
                    ] + this_flight)
                    operated_by = ''
                    effective_from = ''
                    effective_to = ''
                this_flight = flight