Python check_dat_formats_match示例，smu.parser.smu_writer_lib.check_dat_formats_match Python示例

示例#1

0

显示文件

def regenerate_dat(input_tuple, stage):
    """Regenerates the original dat from conformer and compares it to original.

  Args:
    input_tuple: tuple of string (original contents), dataset_pb2.Conformer
    stage: string 'stage1' or 'stage2'

  Returns:
    original_dat, conformer, regenerated dat, int (0=mismatch, 1=match)
  """
    original_dat, conformer = input_tuple
    smu_writer = smu_writer_lib.SmuWriter(annotate=False)
    if stage == 'stage1':
        regen_dat = smu_writer.process_stage1_proto(conformer)
    else:
        regen_dat = smu_writer.process_stage2_proto(conformer)
    try:
        smu_writer_lib.check_dat_formats_match(original_dat.splitlines(),
                                               regen_dat.splitlines())
        beam.metrics.Metrics.counter(_METRICS_NAMESPACE,
                                     stage + '_dat_format_matched').inc()
        return original_dat, conformer, regen_dat, 1
    except smu_writer_lib.DatFormatMismatchError:
        beam.metrics.Metrics.counter(_METRICS_NAMESPACE,
                                     stage + '_dat_format_mismatched').inc()
        return original_dat, conformer, regen_dat, 0

示例#2

0

显示文件

文件： smu_parser_test.py 项目： mengdong/google-research

 def test_roundtrip(self):
     """Tests a conversion from a SMU .dat file to protocol buffer and back."""
     smu_writer = smu_writer_lib.SmuWriter(annotate=False)
     for conformer, orig_contents in self.parser.process_stage2():
         smu_writer_lib.check_dat_formats_match(
             orig_contents,
             smu_writer.process_stage2_proto(conformer).splitlines())

示例#3

0

显示文件

  def test_simple(self):
    parser = smu_parser_lib.SmuParser(
        os.path.join(TESTDATA_PATH, MAIN_DAT_FILE))
    conformer, _ = next(parser.process_stage2())
    expected = get_file_contents(os.path.join(TESTDATA_PATH, ATOMIC_INPUT))
    writer = smu_writer_lib.AtomicInputWriter()

    smu_writer_lib.check_dat_formats_match(
        expected,
        writer.process(conformer).splitlines())

示例#4

0

显示文件

文件： smu_atomic_input_verifier.py 项目： lucifer2288/google-research

def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    atomic_writer = smu_writer_lib.AtomicInputWriter()

    file_count = 0
    conformer_count = 0
    mismatches = 0

    for filepath in gfile.glob(FLAGS.input_glob):
        logging.info('Processing file %s', filepath)
        file_count += 1
        smu_parser = smu_parser_lib.SmuParser(filepath)
        for conformer, _ in smu_parser.process_stage2():
            conformer_count += 1

            actual_contents = atomic_writer.process(conformer)

            expected_fn = atomic_writer.get_filename_for_atomic_input(
                conformer)
            with gfile.GFile(os.path.join(FLAGS.atomic_input_dir,
                                          expected_fn)) as expected_f:
                expected_contents = expected_f.readlines()

            try:
                smu_writer_lib.check_dat_formats_match(
                    expected_contents, actual_contents.splitlines())
            except smu_writer_lib.DatFormatMismatchError as e:
                mismatches += 1
                print(e)
                if FLAGS.output_dir:
                    with gfile.GFile(
                            os.path.join(
                                FLAGS.output_dir,
                                atomic_writer.get_filename_for_atomic_input(
                                    conformer)), 'w') as f:
                        f.write(actual_contents)

    status_str = ('COMPLETE: Read %d files, %d conformers, %d mismatches\n' %
                  (file_count, conformer_count, mismatches))

    logging.info(status_str)
    print(status_str)

示例#5

0

显示文件

 def test_roundtrip_tweaked_bt(self):
     """Tests a conversion from a SMU .dat file to protocol buffer and back."""
     smu_writer = smu_writer_lib.SmuWriter(annotate=False)
     for molecule, orig_contents in self.parser.process_stage2():
         # We're going to mess with the molecule by perturbing the bond_toplogies.
         # The .dat format shoudl only ever use the starting topology, so we are
         # going to add some wrong bond topologies to make sure they are ignored.
         molecule.bond_topologies.append(molecule.bond_topologies[0])
         molecule.bond_topologies.append(molecule.bond_topologies[0])
         molecule.bond_topologies[
             0].source = dataset_pb2.BondTopology.SOURCE_ITC
         molecule.bond_topologies[
             1].source = dataset_pb2.BondTopology.SOURCE_CSD
         for bt in molecule.bond_topologies[0:2]:
             bt.bonds[0].bond_type = dataset_pb2.BondTopology.BOND_TRIPLE
             bt.bond_topology_id += 9999
         smu_writer_lib.check_dat_formats_match(
             orig_contents,
             smu_writer.process_stage2_proto(molecule).splitlines())

示例#6

0

显示文件

  def try_roundtrip(self, filename, stage):
    parser = smu_parser_lib.SmuParser(os.path.join(TESTDATA_PATH, filename))
    writer = smu_writer_lib.SmuWriter(annotate=False)

    if stage == 'stage1':
      process_fn = parser.process_stage1
      writer_fn = writer.process_stage1_proto
    elif stage == 'stage2':
      process_fn = parser.process_stage2
      writer_fn = writer.process_stage2_proto
    else:
      raise ValueError(stage)

    for maybe_conformer, orig_contents in process_fn():
      if isinstance(maybe_conformer, Exception):
        raise maybe_conformer
      self.assertGreater(maybe_conformer.bond_topologies[0].bond_topology_id, 0)
      smu_writer_lib.check_dat_formats_match(
          orig_contents,
          writer_fn(maybe_conformer).splitlines())

示例#7

0

显示文件

文件： smu_roundtrip_verifier.py 项目： ascend2018/google-research

def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    smu_writer = smu_writer_lib.SmuWriter(annotate=False)

    # output_files maps from Outcome to the a pair of file handle
    output_files = {}
    output_files[Outcome.SUCCESS] = (gfile.GFile(
        FLAGS.output_stem + '_success_original.dat',
        'w'), gfile.GFile(FLAGS.output_stem + '_success_regen.dat', 'w'))
    output_files[Outcome.MISMATCH] = (gfile.GFile(
        FLAGS.output_stem + '_mismatch_original.dat',
        'w'), gfile.GFile(FLAGS.output_stem + '_mismatch_regen.dat', 'w'))
    output_files[Outcome.PARSE_ERROR_KNOWN] = (
        gfile.GFile(FLAGS.output_stem + '_parse_error_known_original.dat',
                    'w'),
        gfile.GFile(FLAGS.output_stem + '_parse_error_known_regen.dat', 'w'))
    output_files[Outcome.PARSE_ERROR_UNKNOWN] = (
        gfile.GFile(FLAGS.output_stem + '_parse_error_unknown_original.dat',
                    'w'),
        gfile.GFile(FLAGS.output_stem + '_parse_error_unknown_regen.dat', 'w'))

    file_count = 0
    conformer_count = 0
    outcome_counts = collections.Counter()

    for filepath in gfile.glob(FLAGS.input_glob):
        logging.info('Processing file %s', filepath)
        file_count += 1
        smu_parser = smu_parser_lib.SmuParser(filepath)
        if FLAGS.stage == 'stage1':
            process_fn = smu_parser.process_stage1
        else:
            process_fn = smu_parser.process_stage2
        for conformer, orig_contents_list in process_fn():
            conformer_count += 1

            outcome = None

            if isinstance(conformer, Exception):
                if isinstance(conformer, smu_parser_lib.SmuKnownError):
                    outcome = Outcome.PARSE_ERROR_KNOWN
                else:
                    outcome = Outcome.PARSE_ERROR_UNKNOWN
                regen_contents = '{}\n{}: {} {}\n'.format(
                    smu_parser_lib.SEPARATOR_LINE, conformer.conformer_id,
                    type(conformer).__name__, str(conformer))
            else:
                if FLAGS.stage == 'stage1':
                    regen_contents = smu_writer.process_stage1_proto(conformer)
                else:
                    regen_contents = smu_writer.process_stage2_proto(conformer)
                try:
                    smu_writer_lib.check_dat_formats_match(
                        orig_contents_list, regen_contents.splitlines())
                    outcome = Outcome.SUCCESS
                except smu_writer_lib.DatFormatMismatchError as e:
                    outcome = Outcome.MISMATCH
                    print(e)

            outcome_counts[outcome] += 1
            output_files[outcome][0].write('\n'.join(orig_contents_list) +
                                           '\n')
            output_files[outcome][1].write(regen_contents)

    for file_orig, file_regen in output_files.values():
        file_orig.close()
        file_regen.close()

    def outcome_status(outcome):
        if conformer_count:
            percent = outcome_counts[outcome] / conformer_count * 100
        else:
            percent = float('nan')
        return '%5.1f%% %7d %s \n' % (percent, outcome_counts[outcome],
                                      str(outcome))

    status_str = ('COMPLETE: Read %d files, %d conformers\n' %
                  (file_count, conformer_count) +
                  outcome_status(Outcome.SUCCESS) +
                  outcome_status(Outcome.PARSE_ERROR_KNOWN) +
                  outcome_status(Outcome.MISMATCH) +
                  outcome_status(Outcome.PARSE_ERROR_UNKNOWN))

    logging.info(status_str)
    print(status_str)