Python BinaryConverter.add_csv示例，maro.data_lib.BinaryConverter.add_csv Python示例

示例#1

0

显示文件

文件： test_data_collection_load.py 项目： you-n-g/maro

    def test_load_correct(self):
        config_path = os.path.join("tests", "data", "cim", "data_generator",
                                   "dumps", "config.yml")
        stops_meta_path = os.path.join("tests", "data", "cim",
                                       "data_generator", "dumps",
                                       "cim.stops.meta.yml")

        output_folder = tempfile.mkdtemp()

        # here we need to use CimDataDumpUtil manually to compare the result
        dc: CimDataCollection = CimDataGenerator().gen_data(config_path, 20)

        dumper = CimDataDumpUtil(dc)

        dumper.dump(output_folder)

        # convert stops.csv into binary
        bconverter = BinaryConverter(os.path.join(output_folder, "stops.bin"),
                                     stops_meta_path)

        bconverter.add_csv(os.path.join(output_folder, "stops.csv"))
        bconverter.flush()

        # then load it
        dc2 = load_from_folder(output_folder)

        # compare
        self._compare_ports(dc, dc2)
        self._compare_vessels(dc, dc2)
        self._compare_stops(dc, dc2)
        self._compare_routes(dc, dc2)
        self._compare_misc(dc, dc2)
        self._compare_order_proportion(dc, dc2)

示例#2

0

显示文件

    def test_convert_without_events(self):
        out_dir = tempfile.mkdtemp()

        out_bin = os.path.join(out_dir, "trips.bin")

        meta_file = os.path.join("tests", "data", "data_lib", "case_2", "meta.yml")
        csv_file = os.path.join("tests", "data", "data_lib", "trips.csv")

        bct = BinaryConverter(out_bin, meta_file)

        bct.add_csv(csv_file)

        # flush will close the file, cannot add again
        bct.flush()

        reader = BinaryReader(out_bin)

        meta: BinaryMeta = reader.meta

        self.assertIsNotNone(meta)

        # check events
        self.assertListEqual(["require_bike", "return_bike", "rebalance_bike", "deliver_bike"], [event.display_name for event in meta.events])

        self.assertListEqual(["RequireBike", "ReturnBike", "RebalanceBike", "DeliverBike"], [event.type_name for event in meta.events])

        self.assertEqual("RequireBike", meta.default_event_name)
        self.assertIsNone(meta.event_attr_name)

示例#3

0

显示文件

    def test_convert_with_starttimestamp(self):
        out_dir = tempfile.mkdtemp()

        out_bin = os.path.join(out_dir, "trips.bin")

        meta_file = os.path.join("tests", "data", "data_lib", "case_2", "meta.yml")
        csv_file = os.path.join("tests", "data", "data_lib", "trips.csv")

        #12/31/2018 @ 11:59pm (UTC)
        bct = BinaryConverter(out_bin, meta_file, utc_start_timestamp=1546300740)

        bct.add_csv(csv_file)

        # flush will close the file, cannot add again
        bct.flush()

        reader = BinaryReader(out_bin)

        # check header
        self.assertEqual(1546300740, reader.header.starttime)

        # then tick 0 will not be 2019/01/01 00:00:00
        l = len([item for item in reader.items(end_time_offset=0, time_unit='m')])

        self.assertEqual(0, l)

        # it should be tick 1 for now
        l = len([item for item in reader.items(end_time_offset=1, time_unit='m')])

        self.assertEqual(1, l)

示例#4

0

显示文件

def setup_case(case_name: str, max_tick: int):
    config_path = os.path.join("tests/data/citi_bike", case_name)

    # enable binary exist

    # trips.bin
    trips_bin = os.path.join(config_path, "trips.bin")

    if not os.path.exists(trips_bin):
        converter = BinaryConverter(
            trips_bin, os.path.join("tests/data/citi_bike", "trips.meta.yml"))

        converter.add_csv(os.path.join(config_path, "trips.csv"))
        converter.flush()

    # weathers.bin
    weathers_bin = os.path.join("tests/data/citi_bike", "weathers.bin")

    if not os.path.exists(weathers_bin):
        converter = BinaryConverter(
            weathers_bin,
            os.path.join("tests/data/citi_bike", "weather.meta.yml"))

        converter.add_csv(os.path.join("tests/data/citi_bike", "weather.csv"))
        converter.flush()

    eb = EventBuffer()
    be = CitibikeBusinessEngine(event_buffer=eb,
                                topology=config_path,
                                start_tick=0,
                                max_tick=max_tick,
                                snapshot_resolution=1,
                                max_snapshots=None,
                                additional_options={})

    return eb, be

示例#5

0

显示文件

    def test_convert_with_events(self):
        out_dir = tempfile.mkdtemp()

        out_bin = os.path.join(out_dir, "trips.bin")

        meta_file = os.path.join("tests", "data", "data_lib", "case_1", "meta.yml")
        csv_file = os.path.join("tests", "data", "data_lib", "trips.csv")

        bct = BinaryConverter(out_bin, meta_file)

        # add and convert 1st csv file
        bct.add_csv(csv_file)

        # add again will append to the end ignore the order
        bct.add_csv(csv_file)

        # flush will close the file, cannot add again
        bct.flush()


        # check if output exist
        self.assertTrue(os.path.exists(out_bin))

        # check content
        reader = BinaryReader(out_bin)

        # start tick should be smallest one
        start_date = reader.start_datetime

        self.assertEqual(start_date.year, 2019)
        self.assertEqual(start_date.month, 1)
        self.assertEqual(start_date.day, 1)
        self.assertEqual(start_date.hour, 0)
        self.assertEqual(start_date.minute, 0)
        self.assertEqual(start_date.second, 0)

        end_date = reader.end_datetime

        self.assertEqual(end_date.year, 2019)
        self.assertEqual(end_date.month, 1)
        self.assertEqual(end_date.day, 1)
        self.assertEqual(end_date.hour, 0)
        self.assertEqual(end_date.minute, 5)
        self.assertEqual(end_date.second, 0)     


        # there should be double items as trips.csv
        self.assertEqual(4*2, reader.header.item_count)

        # 20 byte
        self.assertEqual(20, reader.header.item_size)   
        
        start_station_index = [0, 0, 1, 0]

        idx = 0

        # check iterating interface
        for item in reader.items():
            # check if fields same as meta
            self.assertTupleEqual(('timestamp', 'durations', 'src_station', 'dest_station'), item._fields)

            # check item start station index
            self.assertEqual(start_station_index[idx % len(start_station_index)], item.src_station)

            idx += 1
        
        # check if filter works as expected
        l = len([item for item in reader.items(end_time_offset=0, time_unit="m")])

        # although there are 2 items that match the condition, but they not sorted, reader will not try to read to the end, but 
        # to the first item which not match the condition
        self.assertEqual(1, l)

        l = len([item for item in reader.items(start_time_offset=1, time_unit='m')])

        # reader will try to read 1st one that > end tick, so there should be 6 items 
        self.assertEqual(6, l)