def main(): csv_out = open(argv[2], 'w', 'utf-8') for page in read_pages(argv[1], START_PAGE): elements = text_elements(page) elements = bbox(elements, top=752) columns = layout_columns(elements, LAYOUT_COLS) for col in columns: to_airport = '' from_airport = '' for flight in data_columns(col, FLIGHT_COLS): if flight[0].startswith('TO'): to_airport = airport_code(flight[0]) continue if flight[0].startswith('FROM'): from_airport = airport_code(flight[0]) continue if flight[0] == 'From-To': continue if flight[0] == 'Validity': continue if flight[2].startswith('Operated By'): continue if flight[3].startswith('Operated By'): continue if flight[1].startswith('Operated By'): continue if flight[2] == 'Consult your travel agent for details': continue if flight[2] == 'Schedules continue on following page': continue if flight[0].startswith('('): continue if from_airport is None: continue flight = [from_airport, to_airport] + flight print >> csv_out, ','.join(flight)
def main(): csv_out = open(argv[2], 'w', 'utf-8') to_airport = None from_airport = None for pg, page in enumerate(read_pages(argv[1], START_PAGE)): elements = text_elements(page) elements = bbox(elements, top=735, bottom=31) columns = layout_columns(elements, LAYOUT_COLS) for i, col in enumerate(columns): this_flight = None operated_by = '' effective_from = '' effective_to = '' for flight in data_columns(col, FLIGHT_COLS, 1): all_text = ' '.join(flight).strip() #print all_text if flight[0].startswith('To '): airport = airport_code(all_text) if airport: to_airport = airport continue cnt = False match = AIRPORT_CODE_RE.match(flight[4]) if match: from_airport = match.groups()[0] continue match = re.match('Operated By (.+?)( For .+|;.+)?$', all_text) if match: operated_by = match.groups()[0] cnt = True match = re.match('Above Eff. (\d+/\d+)(?: thru (\d+/\d+))?$', all_text) if match: effective_from, effective_to = match.groups() cnt = True match = re.match('Above Disc. (\d+/\d+)$', all_text) if match: effective_to = match.groups()[0] cnt = True match = re.match('Above Ops (\d+/\d+) Only$', all_text) if match: effective_from = effective_to = match.groups()[0] cnt = True if cnt: continue if this_flight: if effective_to is None: effective_to = '' print >> csv_out, ','.join([ #str(pg), #str(i), from_airport, to_airport, operated_by, effective_from, effective_to ] + this_flight) operated_by = '' effective_from = '' effective_to = '' this_flight = flight