Пример #1
0
    def gen_rows(self, readers, prefixes):
        conn = self._conn
        cur = conn.cursor()
        for reader, prefix in zip(readers, prefixes):
            for row in reader:
                date = row['date']
                date_str = '%s-%s-%s' % (date[:4], date[4:6], date[6:8])
                service_id = prefix + row['service_id']
                # We need to find the service_I of this.  To do this we
                # need to check the calendar table, since that (and only
                # that) is the absolute list of service_ids.
                service_I = cur.execute(
                    'SELECT service_I FROM calendar WHERE service_id=?',
                    (decode_six(service_id), )).fetchone()
                if service_I is None:
                    # We have to add a new fake row in order to get a
                    # service_I.  calendar is *the* authoritative source
                    # for service_I:s.
                    cur.execute(
                        'INSERT INTO calendar '
                        '(service_id, m,t,w,th,f,s,su, start_date,end_date)'
                        'VALUES (?, 0,0,0,0,0,0,0, ?,?)',
                        (decode_six(service_id), date_str, date_str))
                    service_I = cur.execute(
                        'SELECT service_I FROM calendar WHERE service_id=?',
                        (decode_six(service_id), )).fetchone()
                service_I = service_I[0]  # row tuple -> int

                yield dict(
                    service_I=int(service_I),
                    date=date_str,
                    exception_type=int(row['exception_type']),
                )
Пример #2
0
 def gen_rows(self, readers, prefixes):
     for reader, prefix in zip(readers, prefixes):
         for row in reader:
             #print row
             yield dict(
                 _stop_id=prefix + decode_six(row['stop_id']),
                 _trip_id=prefix + decode_six(row['trip_id']),
                 arr_time=row['arrival_time'],
                 dep_time=row['departure_time'],
                 seq=int(row['stop_sequence']),
             )
Пример #3
0
 def gen_rows(self, readers, prefixes):
     for reader, prefix in zip(readers, prefixes):
         for row in reader:
             #print row
             yield dict(_from_stop_id=prefix +
                        decode_six(row['from_stop_id']).strip(),
                        _to_stop_id=prefix +
                        decode_six(row['to_stop_id']).strip(),
                        transfer_type=int(row['transfer_type']),
                        min_transfer_time=int(row['min_transfer_time']) if
                        ('min_transfer_time' in row and
                         (row.get('min_transfer_time').strip())) else None)
Пример #4
0
 def gen_rows(self, readers, prefixes):
     for reader, prefix in zip(readers, prefixes):
         for row in reader:
             #print row
             yield dict(shape_id=prefix + decode_six(row['shape_id']),
                        lat=float(row['shape_pt_lat']),
                        lon=float(row['shape_pt_lon']),
                        seq=int(row['shape_pt_sequence']))
Пример #5
0
 def gen_rows(self, readers, prefixes):
     for reader, prefix in zip(readers, prefixes):
         for row in reader:
             yield dict(
                 _trip_id=prefix + decode_six(row['trip_id']),
                 start_time=row['start_time'],
                 end_time=row['end_time'],
                 headway_secs=int(row['headway_secs']),
                 exact_times=int(row['exact_times']) if 'exact_times' in row
                 and row['exact_times'].isdigit() else 0)
Пример #6
0
 def gen_rows(self, readers, prefixes):
     for reader, prefix in zip(readers, prefixes):
         for row in reader:
             #print row
             start = row[
                 'feed_start_date'] if 'feed_start_date' in row else None
             end = row['feed_end_date'] if 'feed_end_date' in row else None
             yield dict(
                 feed_publisher_name=decode_six(row['feed_publisher_name'])
                 if 'feed_publisher_name' in row else None,
                 feed_publisher_url=decode_six(row['feed_publisher_url'])
                 if 'feed_publisher_url' in row else None,
                 feed_lang=decode_six(row['feed_lang'])
                 if 'feed_lang' in row else None,
                 feed_start_date='%s-%s-%s' %
                 (start[:4], start[4:6], start[6:8]) if start else None,
                 feed_end_date='%s-%s-%s' %
                 (end[:4], end[4:6], end[6:8]) if end else None,
                 feed_version=decode_six(row['feed_version'])
                 if 'feed_version' in row else None,
                 feed_id=prefix[:-1] if len(prefix) > 0 else prefix)
Пример #7
0
 def gen_rows(self, readers, prefixes):
     for reader, prefix in zip(readers, prefixes):
         for row in reader:
             #print row
             assert row[
                 'arrival_time'] != "", "Some stop_times entries is missing arrival time information."
             assert row[
                 'departure_time'] != "", "Some stop_times entries is missing departure time information."
             assert row[
                 'stop_sequence'] != "", "Some stop_times entries is missing seq information."
             assert row[
                 'stop_id'] != "", "Some stop_times entries is missing stop_id information."
             assert row[
                 'trip_id'] != "", "Some stop_times entries is missing trip_id information."
             yield dict(
                 _stop_id=prefix + decode_six(row['stop_id']),
                 _trip_id=prefix + decode_six(row['trip_id']),
                 arr_time=row['arrival_time'],
                 dep_time=row['departure_time'],
                 seq=int(row['stop_sequence']),
             )
Пример #8
0
 def gen_rows(self, readers, prefixes):
     for reader, prefix in zip(readers, prefixes):
         for row in reader:
             # and transform the "row" dictionary into a new
             # dictionary, which is yielded.  There can be different
             # transformations here, as needed.
             yield dict(
                 stop_id=prefix + decode_six(row['stop_id']),
                 code=decode_six(row['stop_code'])
                 if 'stop_code' in row else None,
                 name=decode_six(row['stop_name']),
                 desc=decode_six(row['stop_desc'])
                 if 'stop_desc' in row else None,
                 lat=float(row['stop_lat']),
                 lon=float(row['stop_lon']),
                 _parent_id=prefix + decode_six(row['parent_station'])
                 if row.get('parent_station', '') else None,
                 location_type=int(row['location_type'])
                 if row.get('location_type') else None,
                 wheelchair_boarding=int(row['wheelchair_boarding'])
                 if row.get('wheelchair_boarding', '') else None,
             )
Пример #9
0
 def gen_rows(self, readers, prefixes):
     from gtfspy import extended_route_types
     for reader, prefix in zip(readers, prefixes):
         for row in reader:
             #print (row)
             yield dict(
                 route_id      = prefix + decode_six(row['route_id']),
                 _agency_id    = prefix + decode_six(row['agency_id']) if 'agency_id' in row else None,
                 name          = decode_six(row['route_short_name']),
                 long_name     = decode_six(row['route_long_name']),
                 desc          = decode_six(row['route_desc']) if 'route_desc' in row else None,
                 type          = extended_route_types.ROUTE_TYPE_CONVERSION[int(row['route_type'])],
                 url           = decode_six(row['route_url']) if 'route_url' in row else None,
                 color         = decode_six(row['route_color']) if 'route_color' in row else None,
                 text_color    = decode_six(row['route_text_color']) if 'route_text_color' in row else None,
             )
Пример #10
0
    def gen_rows(self, readers, prefixes):

        for reader, prefix in zip(readers, prefixes):
            for row in reader:
                yield dict(
                    agency_id=prefix + decode_six(row.get('agency_id', '1')),
                    name=decode_six(row['agency_name']),
                    timezone=decode_six(row['agency_timezone']),
                    url=decode_six(row['agency_url']),
                    lang=decode_six(row['agency_lang'])
                    if 'agency_lang' in row else None,
                    phone=decode_six(row['agency_phone'])
                    if 'agency_phone' in row else None,
                )
Пример #11
0
 def gen_rows(self, readers, prefixes):
     #try:
     for reader, prefix in zip(readers, prefixes):
         for row in reader:
             #print row
             yield dict(
                 _route_id=prefix + decode_six(row['route_id']),
                 _service_id=prefix + decode_six(row['service_id']),
                 trip_id=prefix + decode_six(row['trip_id']),
                 direction_id=decode_six(row['direction_id']) if row.get(
                     'direction_id', '') else None,
                 shape_id=prefix + decode_six(row['shape_id']) if row.get(
                     'shape_id', '') else None,
                 headsign=decode_six(row['trip_headsign'])
                 if 'trip_headsign' in row else None,
             )
Пример #12
0
def import_gtfs(gtfs_sources,
                output,
                preserve_connection=False,
                print_progress=True,
                location_name=None,
                **kwargs):
    """Import a GTFS database

    gtfs_sources: str, dict, list
        Paths to the gtfs zip file or to the directory containing the GTFS data.
        Alternatively, a dict can be provide that maps gtfs filenames
        (like 'stops.txt' and 'agencies.txt') to their string presentations.

    output: str or sqlite3.Connection
        path to the new database to be created, or an existing
        sqlite3 connection
    preserve_connection: bool, optional
        Whether to close the connection in the end, or not.
    print_progress: bool, optional
        Whether to print progress output
    location_name: str, optional
        set the location of this database
    """
    if isinstance(output, sqlite3.Connection):
        conn = output
    else:
        # if os.path.isfile(output):
        #  raise RuntimeError('File already exists')
        conn = sqlite3.connect(output)
    if not isinstance(gtfs_sources, list):
        gtfs_sources = [gtfs_sources]
    cur = conn.cursor()
    time_import_start = time.time()

    # These are a bit unsafe, but make importing much faster,
    # especially on scratch.
    cur.execute('PRAGMA page_size = 4096;')
    cur.execute('PRAGMA mmap_size = 1073741824;')
    cur.execute('PRAGMA cache_size = -2000000;')
    cur.execute('PRAGMA temp_store=2;')
    # Changes of isolation level are python3.6 workarounds -
    # eventually will probably be fixed and this can be removed.
    conn.isolation_level = None  # change to autocommit mode (former default)
    cur.execute('PRAGMA journal_mode = OFF;')
    #cur.execute('PRAGMA journal_mode = WAL;')
    cur.execute('PRAGMA synchronous = OFF;')
    conn.isolation_level = ''  # change back to python default.
    # end python3.6 workaround

    # Do the actual importing.
    loaders = [
        L(gtfssource=gtfs_sources, print_progress=print_progress, **kwargs)
        for L in Loaders
    ]

    for loader in loaders:
        loader.assert_exists_if_required()

    # Do initial import.  This consists of making tables, raw insert
    # of the CSVs, and then indexing.

    for loader in loaders:
        loader.import_(conn)

    # Do any operations that require all tables present.
    for Loader in loaders:
        Loader.post_import_round2(conn)

    # Make any views
    for Loader in loaders:
        Loader.make_views(conn)

    # Make any views
    for F in postprocessors:
        F(conn)

    # Set up same basic metadata.
    from gtfspy import gtfs as mod_gtfs
    G = mod_gtfs.GTFS(output)
    G.meta['gen_time_ut'] = time.time()
    G.meta['gen_time'] = time.ctime()
    G.meta['import_seconds'] = time.time() - time_import_start
    G.meta['download_date'] = ''
    G.meta['location_name'] = ''
    G.meta['n_gtfs_sources'] = len(gtfs_sources)

    # Extract things from GTFS
    download_date_strs = []
    for i, source in enumerate(gtfs_sources):
        if len(gtfs_sources) == 1:
            prefix = ""
        else:
            prefix = "feed_" + str(i) + "_"
        if isinstance(source, string_types):
            G.meta[prefix +
                   'original_gtfs'] = decode_six(source) if source else None
            # Extract GTFS date.  Last date pattern in filename.
            filename_date_list = re.findall(r'\d{4}-\d{2}-\d{2}', source)
            if filename_date_list:
                date_str = filename_date_list[-1]
                G.meta[prefix + 'download_date'] = date_str
                download_date_strs.append(date_str)
            if location_name:
                G.meta['location_name'] = location_name
            else:
                location_name_list = re.findall(r'/([^/]+)/\d{4}-\d{2}-\d{2}',
                                                source)
                if location_name_list:
                    G.meta[prefix + 'location_name'] = location_name_list[-1]
                else:
                    try:
                        G.meta[prefix +
                               'location_name'] = source.split("/")[-4]
                    except:
                        G.meta[prefix + 'location_name'] = source

    if G.meta['download_date'] == "":
        unique_download_dates = list(set(download_date_strs))
        if len(unique_download_dates) == 1:
            G.meta['download_date'] = unique_download_dates[0]

    G.meta['timezone'] = cur.execute(
        'SELECT timezone FROM agencies LIMIT 1').fetchone()[0]
    stats.update_stats(G)
    del G

    if print_progress:
        print("Vacuuming...")
    # Next 3 lines are python 3.6 work-arounds again.
    conn.isolation_level = None  # former default of autocommit mode
    cur.execute('VACUUM;')
    conn.isolation_level = ''  # back to python default
    # end python3.6 workaround
    if print_progress:
        print("Analyzing...")
    cur.execute('ANALYZE')
    if not (preserve_connection is True):
        conn.close()