def test_metadata_correctness(self):
        # Get metadata.
        metadata = model_conversion_beam_utils.get_pipeline_metadata(
            self.base_experiment_dir, self.xids, self.output_dir,
            self.conversion_types, 'suffix')

        # Check correctness.
        self.assertLen(
            metadata,
            len(self.xids) * len(self.folders) * len(self.conversion_types))
        xid_counts = {k: 0 for k in self.xids}
        model_num_counts = {k: 0 for k in range(len(self.folders))}
        conversion_type_counts = {k: 0 for k in self.conversion_types}
        for m in metadata:
            xid_counts[m.xid] += 1
            model_num_counts[m.model_num] += 1
            conversion_type_counts[m.conversion_type] += 1

        for xid in self.xids:
            self.assertEqual(xid_counts[xid], 6)
        for k in range(3):
            self.assertEqual(model_num_counts[k], 6)
        for ctype in conversion_type_counts:
            self.assertEqual(conversion_type_counts[ctype], 9)

        # Check output uniqueness.
        all_output_filenames = [m.output_filename for m in metadata]
        self.assertEqual(len(set(all_output_filenames)),
                         len(all_output_filenames))
Exemplo n.º 2
0
def main(unused_argv):
    beam_options = None

    # Get metadata for conversion.
    metadata = utils.get_pipeline_metadata(FLAGS.base_experiment_dir,
                                           FLAGS.xids, FLAGS.output_dir,
                                           FLAGS.conversion_types,
                                           FLAGS.output_suffix)
    if not metadata:
        raise ValueError(
            f'No data found: {FLAGS.base_experiment_dir}, {FLAGS.xids}')
    logging.info('%i models in %i xids.', len(metadata), len(FLAGS.xids))

    # Check that models don't already exist, and create directories if necessary.
    for m in metadata:
        utils.sanity_check_output_filename(m.output_filename)

    logging.info('Starting to create flume pipeline...')

    def _convert_and_write_model(m):
        utils.convert_and_write_model(m,
                                      include_frontend=FLAGS.include_frontend,
                                      sanity_check=FLAGS.sanity_check)
        return m

    # Make and run beam pipeline.
    with beam.Pipeline(beam_options) as root:
        _ = (root
             | 'MakeMetadataCollection' >> beam.Create(metadata)
             | 'ConvertAndWriteModelsToDisk' >>
             beam.Map(_convert_and_write_model))
    def test_metadata_sanity(self):
        # Get metadata.
        metadata = model_conversion_beam_utils.get_pipeline_metadata(
            self.base_experiment_dir, self.xids, self.output_dir,
            self.conversion_types)

        for m in metadata:
            model_conversion_beam_utils.sanity_check_output_filename(
                m.output_filename)