Пример #1
0
  def test_large(self):
    parser = ZincAnalysisParser()

    with _temp_test_dir('large.zip') as testdir:
      print('Operating in test dir: {}'.format(testdir))
      # Parse analysis files.
      analysis_file_names = [b'downstream.analysis', b'upstream.analysis']
      analysis_files = [os.path.join(testdir, f) for f in analysis_file_names]

      def msg(prefix):
        return '{0} [{1}]'.format(prefix, ', '.join(analysis_file_names))

      analyses = self._time(lambda: [parser.parse_from_path(f) for f in analysis_files],
                            msg('Parsed'))

      # Write them back out individually.
      writeout_dir = os.path.join(testdir, b'write')
      os.mkdir(writeout_dir)
      def write(file_name, analysis):
        outpath = os.path.join(writeout_dir, file_name)
        analysis.write_to_path(outpath)

      def _write_all():
        for analysis_file, analysis in zip(analysis_files, analyses):
          write(os.path.basename(analysis_file), analysis)

      self._time(_write_all, msg('Wrote'))

      # Merge them.
      merged_analysis = self._time(lambda: ZincAnalysis.merge(analyses), msg('Merged'))

      # Write merged analysis to file.
      merged_analysis_path = os.path.join(testdir, b'merged.analysis')
      self._time(lambda: merged_analysis.write_to_path(merged_analysis_path), msg('Wrote merge of'))

      # Split the merged analysis.
      sources_per_analysis = [a.stamps.sources.keys() for a in analyses]
      self._time(lambda: merged_analysis.split(sources_per_analysis, catchall=True), msg('Split'))

      # Rebase the merged analysis.
      rebased_analysis_path = os.path.join(testdir, b'rebased.merged.analysis')
      self._time(lambda: ZincAnalysisParser().rebase_from_path(merged_analysis_path, rebased_analysis_path,
          b'/Users/kermit/src/acme.web', b'$PANTS_HOME'), msg('Rebase'))

    print('Total time: %f seconds' % self.total_time)
Пример #2
0
  def test_large(self):
    parser = ZincAnalysisParser()

    with _temp_test_dir('large.zip') as testdir:
      print('Operating in test dir: {}'.format(testdir))
      # Parse analysis files.
      analysis_file_names = [b'downstream.analysis', b'upstream.analysis']
      analysis_files = [os.path.join(testdir, f) for f in analysis_file_names]

      def msg(prefix):
        return '{0} [{1}]'.format(prefix, ', '.join(analysis_file_names))

      analyses = self._time(lambda: [parser.parse_from_path(f) for f in analysis_files],
                            msg('Parsed'))

      # Write them back out individually.
      writeout_dir = os.path.join(testdir, b'write')
      os.mkdir(writeout_dir)
      def write(file_name, analysis):
        outpath = os.path.join(writeout_dir, file_name)
        analysis.write_to_path(outpath)

      def _write_all():
        for analysis_file, analysis in zip(analysis_files, analyses):
          write(os.path.basename(analysis_file), analysis)

      self._time(_write_all, msg('Wrote'))

      # Merge them.
      merged_analysis = self._time(lambda: ZincAnalysis.merge(analyses), msg('Merged'))

      # Write merged analysis to file.
      merged_analysis_path = os.path.join(testdir, b'merged.analysis')
      self._time(lambda: merged_analysis.write_to_path(merged_analysis_path), msg('Wrote merge of'))

      # Split the merged analysis.
      sources_per_analysis = [a.stamps.sources.keys() for a in analyses]
      self._time(lambda: merged_analysis.split(sources_per_analysis, catchall=True), msg('Split'))

      # Rebase the merged analysis.
      rebased_analysis_path = os.path.join(testdir, b'rebased.merged.analysis')
      self._time(lambda: ZincAnalysisParser().rebase_from_path(merged_analysis_path, rebased_analysis_path,
          b'/Users/kermit/src/acme.web', b'$PANTS_HOME'), msg('Rebase'))

    print('Total time: %f seconds' % self.total_time)
Пример #3
0
  def _generate_testworthy_splits(self):
    """Take some non-canonical analysis files and generate test data from them.

    The resulting files will be "canonical". That is, merging and re-splitting them will yield
    the same files. Therefore the resulting files can be used as test data (after eyeballing them
    to ensure no pathologies).

    An easy way to generate input for this function is to run a scala compile on some targets using
    --strategy=isolated. Then .pants.d/compile/jvm/scala/isolated-analysis/ will contain a bunch
    of per-target analysis files.

    Those files can be anonymized (see anonymize_analysis.py), ideally with some non-ASCII words
    thrown in (as explained there), and then you can point this function to those anonymized
    files by setting ZINC_ANALYSIS_TEST_DATA_SOURCE=<dir> in the environment and running this test.

    Note: Yes, it's slightly problematic that we're using the very code we're testing to generate
    the test inputs. Hence the need to spot-check for obvious pathologies.
    """
    original_splits_dir = os.environ.get(_TEST_DATA_SOURCE_ENV_VAR)

    canonical_dir = os.path.join(original_splits_dir, 'canonical')
    if os.path.exists(canonical_dir):
      shutil.rmtree(canonical_dir, True)
    os.mkdir(canonical_dir)

    original_split_filenames = [f.decode('utf-8') for f in os.listdir(original_splits_dir)]
    original_splits_files = [os.path.join(original_splits_dir, f)
                             for f in original_split_filenames if f.endswith('.analysis')]

    parser = ZincAnalysisParser()
    original_split_analyses = [parser.parse_from_path(f) for f in original_splits_files]
    merged_analysis = ZincAnalysis.merge(original_split_analyses)
    merged_analysis.write_to_path(os.path.join(canonical_dir, 'all.merged.analysis'))

    # Split the merged analysis back to individual analyses.
    sources_per_analysis = [a.stamps.sources.keys() for a in original_split_analyses]
    split_analyses = merged_analysis.split(sources_per_analysis, os.path.dirname(__file__))
    for original_split_file, split_analysis in zip(original_splits_files, split_analyses):
      outpath = os.path.join(canonical_dir, os.path.basename(original_split_file))
      split_analysis.write_to_path(outpath)

    print('Wrote canonical analysis data to {}'.format(canonical_dir))
Пример #4
0
  def _generate_testworthy_splits(self):
    """Take some non-canonical analysis files and generate test data from them.

    The resulting files will be "canonical". That is, merging and re-splitting them will yield
    the same files. Therefore the resulting files can be used as test data (after eyeballing them
    to ensure no pathologies).

    An easy way to generate input for this function is to run a scala compile on some targets using
    --strategy=isolated. Then .pants.d/compile/jvm/scala/isolated-analysis/ will contain a bunch
    of per-target analysis files.

    Those files can be anonymized (see anonymize_analysis.py), ideally with some non-ASCII words
    thrown in (as explained there), and then you can point this function to those anonymized
    files by setting ZINC_ANALYSIS_TEST_DATA_SOURCE=<dir> in the environment and running this test.

    Note: Yes, it's slightly problematic that we're using the very code we're testing to generate
    the test inputs. Hence the need to spot-check for obvious pathologies.
    """
    original_splits_dir = os.environ.get(_TEST_DATA_SOURCE_ENV_VAR)

    canonical_dir = os.path.join(original_splits_dir, 'canonical')
    if os.path.exists(canonical_dir):
      shutil.rmtree(canonical_dir, True)
    os.mkdir(canonical_dir)

    original_split_filenames = [f.decode('utf-8') for f in os.listdir(original_splits_dir)]
    original_splits_files = [os.path.join(original_splits_dir, f)
                             for f in original_split_filenames if f.endswith('.analysis')]

    parser = ZincAnalysisParser()
    original_split_analyses = [parser.parse_from_path(f) for f in original_splits_files]
    merged_analysis = ZincAnalysis.merge(original_split_analyses)
    merged_analysis.write_to_path(os.path.join(canonical_dir, 'all.merged.analysis'))

    # Split the merged analysis back to individual analyses.
    sources_per_analysis = [a.stamps.sources.keys() for a in original_split_analyses]
    split_analyses = merged_analysis.split(sources_per_analysis, os.path.dirname(__file__))
    for original_split_file, split_analysis in zip(original_splits_files, split_analyses):
      outpath = os.path.join(canonical_dir, os.path.basename(original_split_file))
      split_analysis.write_to_path(outpath)

    print('Wrote canonical analysis data to {}'.format(canonical_dir))
Пример #5
0
  def test_complex(self):
    with environment_as(ZINCUTILS_SORTED_ANALYSIS='1'):
      if os.environ.get(_TEST_DATA_SOURCE_ENV_VAR):
        print('\n>>>>>>>>> {} set: skipping test, generating canonical test data instead.'.format(
          _TEST_DATA_SOURCE_ENV_VAR))
        self._generate_testworthy_splits()
        return

      parser = ZincAnalysisParser()

      with _temp_test_dir('complex.zip') as testdir:
        # Parse analysis files.
        analysis_files = [os.path.join(testdir, f)
                          for f in os.listdir(testdir)
                          if f.endswith(b'.analysis') and not f.endswith(b'.merged.analysis')]
        num_analyses = len(analysis_files)

        def parse(f):
          return parser.parse_from_path(f)

        analyses = self._time(lambda: [parse(f) for f in analysis_files],
                              'Parsed %d files' % num_analyses)

        # Write them back out individually.
        writeout_dir = os.path.join(testdir, b'write')
        os.mkdir(writeout_dir)
        def write(file_name, analysis):
          outpath = os.path.join(writeout_dir, file_name)
          analysis.write_to_path(outpath)

        def _write_all():
          for analysis_file, analysis in zip(analysis_files, analyses):
            write(os.path.basename(analysis_file), analysis)

        self._time(_write_all, 'Wrote %d files' % num_analyses)

        # Merge them.
        merged_analysis = self._time(lambda: ZincAnalysis.merge(analyses),
                                     'Merged %d files' % num_analyses)

        # Write merged analysis to file.
        merged_analysis_path = os.path.join(writeout_dir, b'merged.analysis')
        self._time(lambda: merged_analysis.write_to_path(merged_analysis_path),
                   'Wrote merged analysis to %s' % merged_analysis_path)

        # Read merged analysis from file.
        merged_analysis2 = self._time(lambda: parser.parse_from_path(merged_analysis_path),
                                      'Read merged analysis from %s' % merged_analysis_path)

        # Read the expected merged analysis from file.
        expected_merged_analysis_path = os.path.join(testdir, b'all.merged.analysis')
        expected_merged_analysis = self._time(
          lambda: parser.parse_from_path(expected_merged_analysis_path),
          'Read expected merged analysis from %s' % expected_merged_analysis_path)

        # Compare the merge result with the re-read one.
        diffs = merged_analysis.diff(merged_analysis2)
        self.assertTrue(merged_analysis.is_equal_to(merged_analysis2), ''.join(
          [unicode(diff) for diff in diffs]))

        # Compare the merge result with the expected.
        diffs = expected_merged_analysis.diff(merged_analysis2)
        self.assertTrue(expected_merged_analysis.is_equal_to(merged_analysis2), ''.join(
          [unicode(diff) for diff in diffs]))

        # Split the merged analysis back to individual analyses.
        sources_per_analysis = [a.stamps.sources.keys() for a in analyses]
        split_analyses = self._time(lambda: merged_analysis2.split(
          sources_per_analysis, catchall=True),
          'Split back into %d analyses' % num_analyses)

        self.assertEquals(num_analyses + 1, len(split_analyses))  # +1 for the catchall.
        catchall_analysis = split_analyses[-1]

        # We expect an empty catchall.
        self.assertEquals(0, len(catchall_analysis.stamps.sources))

        # Diff the original analyses and the split ones.

        # Write the split to the tmpdir, for ease of debugging on failure.
        splits_dir = os.path.join(testdir, b'splits')
        os.mkdir(splits_dir)
        for analysis_file, analysis, split_analysis in zip(analysis_files, analyses, split_analyses):
          outfile_path = os.path.join(splits_dir, os.path.basename(analysis_file))
          split_analysis.write_to_path(outfile_path)
          diffs = analysis.diff(split_analysis)
          # Note that it's not true in general that merging splits and then splitting them back out
          # should yield the exact same analysis. Some small differences can happen. For example:
          # splitA may have an external src->class on a class from a source file in splitB; When
          # merging, that becomes a src->src dependency; And when splitting back out that src
          # dependency becomes a dependency on a representative class, which may not be
          # the original class SplitA depended on.
          #
          # This comparison works here only because we've taken care to prepare test data for which
          # it should hold. See _generate_testworthy_splits below for how to do so.
          self.assertTrue(analysis.is_equal_to(split_analysis),
                          ''.join([unicode(diff) for diff in diffs]))

      print('Total time: %f seconds' % self.total_time)
Пример #6
0
  def test_complex(self):
    with environment_as(ZINCUTILS_SORTED_ANALYSIS='1'):
      if os.environ.get(_TEST_DATA_SOURCE_ENV_VAR):
        print('\n>>>>>>>>> {} set: skipping test, generating canonical test data instead.'.format(
          _TEST_DATA_SOURCE_ENV_VAR))
        self._generate_testworthy_splits()
        return

      parser = ZincAnalysisParser()

      with _temp_test_dir('complex.zip') as testdir:
        # Parse analysis files.
        analysis_files = [os.path.join(testdir, f)
                          for f in os.listdir(testdir)
                          if f.endswith(b'.analysis') and not f.endswith(b'.merged.analysis')]
        num_analyses = len(analysis_files)

        def parse(f):
          return parser.parse_from_path(f)

        analyses = self._time(lambda: [parse(f) for f in analysis_files],
                              'Parsed %d files' % num_analyses)

        # Write them back out individually.
        writeout_dir = os.path.join(testdir, b'write')
        os.mkdir(writeout_dir)
        def write(file_name, analysis):
          outpath = os.path.join(writeout_dir, file_name)
          analysis.write_to_path(outpath)

        def _write_all():
          for analysis_file, analysis in zip(analysis_files, analyses):
            write(os.path.basename(analysis_file), analysis)

        self._time(_write_all, 'Wrote %d files' % num_analyses)

        # Merge them.
        merged_analysis = self._time(lambda: ZincAnalysis.merge(analyses),
                                     'Merged %d files' % num_analyses)

        # Write merged analysis to file.
        merged_analysis_path = os.path.join(writeout_dir, b'merged.analysis')
        self._time(lambda: merged_analysis.write_to_path(merged_analysis_path),
                   'Wrote merged analysis to %s' % merged_analysis_path)

        # Read merged analysis from file.
        merged_analysis2 = self._time(lambda: parser.parse_from_path(merged_analysis_path),
                                      'Read merged analysis from %s' % merged_analysis_path)

        # Read the expected merged analysis from file.
        expected_merged_analysis_path = os.path.join(testdir, b'all.merged.analysis')
        expected_merged_analysis = self._time(
          lambda: parser.parse_from_path(expected_merged_analysis_path),
          'Read expected merged analysis from %s' % expected_merged_analysis_path)

        # Compare the merge result with the re-read one.
        diffs = merged_analysis.diff(merged_analysis2)
        self.assertTrue(merged_analysis.is_equal_to(merged_analysis2), ''.join(
          [unicode(diff) for diff in diffs]))

        # Compare the merge result with the expected.
        diffs = expected_merged_analysis.diff(merged_analysis2)
        self.assertTrue(expected_merged_analysis.is_equal_to(merged_analysis2), ''.join(
          [unicode(diff) for diff in diffs]))

        # Split the merged analysis back to individual analyses.
        sources_per_analysis = [a.stamps.sources.keys() for a in analyses]
        split_analyses = self._time(lambda: merged_analysis2.split(
          sources_per_analysis, catchall=True),
          'Split back into %d analyses' % num_analyses)

        self.assertEquals(num_analyses + 1, len(split_analyses))  # +1 for the catchall.
        catchall_analysis = split_analyses[-1]

        # We expect an empty catchall.
        self.assertEquals(0, len(catchall_analysis.stamps.sources))

        # Diff the original analyses and the split ones.

        # Write the split to the tmpdir, for ease of debugging on failure.
        splits_dir = os.path.join(testdir, b'splits')
        os.mkdir(splits_dir)
        for analysis_file, analysis, split_analysis in zip(analysis_files, analyses, split_analyses):
          outfile_path = os.path.join(splits_dir, os.path.basename(analysis_file))
          split_analysis.write_to_path(outfile_path)
          diffs = analysis.diff(split_analysis)
          # Note that it's not true in general that merging splits and then splitting them back out
          # should yield the exact same analysis. Some small differences can happen. For example:
          # splitA may have an external src->class on a class from a source file in splitB; When
          # merging, that becomes a src->src dependency; And when splitting back out that src
          # dependency becomes a dependency on a representative class, which may not be
          # the original class SplitA depended on.
          #
          # This comparison works here only because we've taken care to prepare test data for which
          # it should hold. See _generate_testworthy_splits below for how to do so.
          self.assertTrue(analysis.is_equal_to(split_analysis),
                          ''.join([unicode(diff) for diff in diffs]))

      print('Total time: %f seconds' % self.total_time)