示例#1
0
def test_tfl_data():
    return get_path('test_tfl_format')
示例#2
0
def test_data():
    return get_path('test_data_dir')
示例#3
0
def convert(data_dir, output_filepath, append_to_existing=False):
    """
    Converts TransXchange formatted schedule data into GTFS feed.

    data_dir : str
        Data directory containing one or multiple TransXchange .xml files.
    output_filepath : str
        Full filepath to the output GTFS zip-file, e.g. '/home/myuser/data/my_gtfs.zip'
    append_to_existing : bool (default is False)
        Flag for appending to existing gtfs-database. This might be useful if you have
        TransXchange .xml files distributed into multiple directories (e.g. separate files for
        train data, tube data and bus data) and you want to merge all those datasets into a single
        GTFS feed.
    """
    # Total start
    tot_start_t = timeit()

    # Filepath for temporary gtfs db
    target_dir = os.path.dirname(output_filepath)
    gtfs_db = os.path.join(target_dir, "gtfs.db")

    # If append to database is false remove previous gtfs-database if it exists
    if append_to_existing == False:
        if os.path.exists(gtfs_db):
            os.remove(gtfs_db)

    # NAPTAN stops
    naptan_stops_fp = get_path("naptan_stops")

    # Retrieve all TransXChange files
    files = glob.glob(os.path.join(data_dir, "*.xml"))

    # Iterate over files
    print("Populating database ..")

    # Limit the processed files by file size (in MB)
    # Files with lower filesize than below will be processed
    file_size_limit = 1000

    # Create workers
    workers = create_workers(input_files=files,
                             file_size_limit=file_size_limit,
                             stops_fp=naptan_stops_fp,
                             gtfs_db=gtfs_db)

    # Create Pool
    pool = multiprocessing.Pool()

    # Generate GTFS info to the database in parallel
    pool.map(process_files, workers)

    # Print information about the total time
    tot_end_t = timeit()
    tot_duration = (tot_end_t - tot_start_t) / 60
    print("===========================================================")
    print("It took %s minutes in total." % round(tot_duration, 1))

    # Generate output dictionary
    gtfs_data = generate_gtfs_export(gtfs_db)

    # Export to disk
    save_to_gtfs_zip(output_zip_fp=output_filepath, gtfs_data=gtfs_data)
示例#4
0
def test_txc21_data():
    return get_path('test_txc21_format')
示例#5
0
def unpacked_data():
    return get_path('test_data_dir')
示例#6
0
def dir_with_packed_data():
    return get_path('test_dir_with_packed_data')
示例#7
0
def nested_data():
    return get_path('test_nested_packed_data')
示例#8
0
def packed_data():
    return get_path('test_packed_data')
示例#9
0
def test_naptan_data():
    return get_path('naptan_stops')