Exemplo n.º 1
0
def _fingerprint_files_in_tar(file_list, tar_location):
    hasher = sha1()
    with open_tar(tar_location, mode='r', errorlevel=1) as tar:
        for file in file_list.dependencies:
            hasher.update(file.path)
            hasher.update(tar.extractfile(file.path).read())
    return hasher.hexdigest()
Exemplo n.º 2
0
    def capture(self, run_info_dict):
        # Force the scm discovery logging messages to appear before ours, so the startup delay
        # is properly associated in the user's mind with us and not with scm.
        logger.info(f'Capturing repro information to {self._path}')
        with open_tar(self._path, 'w:gz', dereference=True,
                      compresslevel=6) as tarout:
            for relpath in os.listdir(self._buildroot):
                if relpath not in self._ignore:
                    tarout.add(os.path.join(self._buildroot, relpath), relpath)

            with temporary_file(binary_mode=False) as tmpfile:
                tmpfile.write(
                    '# Pants repro captured for the following build:\n')
                for k, v in sorted(run_info_dict.items()):
                    tmpfile.write(f'#  {k}: {v}\n')
                cmd_line = list(sys.argv)
                # Use 'pants' instead of whatever the full executable path was on the user's system.
                cmd_line[0] = 'pants'
                # Remove any repro-related flags. The repro-ing user won't want to call those.
                cmd_line = [
                    x for x in cmd_line if not x.startswith('--repro-')
                ]
                tmpfile.write("'" + "' '".join(cmd_line) + "'\n")
                tmpfile.flush()
                chmod_plus_x(tmpfile.name)
                tarout.add(tmpfile.name, 'repro.sh')
Exemplo n.º 3
0
 def extract(self):
     try:
         with open_tar(self._tarfile, 'r', errorlevel=2) as tarin:
             # Note: We create all needed paths proactively, even though extractall() can do this for us.
             # This is because we may be called concurrently on multiple artifacts that share directories,
             # and there will be a race condition inside extractall(): task T1 A) sees that a directory
             # doesn't exist and B) tries to create it. But in the gap between A) and B) task T2 creates
             # the same directory, so T1 throws "File exists" in B).
             # This actually happened, and was very hard to debug.
             # Creating the paths here up front allows us to squelch that "File exists" error.
             paths = []
             dirs = set()
             for tarinfo in tarin.getmembers():
                 paths.append(tarinfo.name)
                 if tarinfo.isdir():
                     dirs.add(tarinfo.name)
                 else:
                     dirs.add(os.path.dirname(tarinfo.name))
             for d in dirs:
                 try:
                     os.makedirs(os.path.join(self._artifact_root, d))
                 except OSError as e:
                     if e.errno != errno.EEXIST:
                         raise
             tarin.extractall(self._artifact_root)
             self._relpaths.update(paths)
     except tarfile.ReadError as e:
         raise ArtifactError(e.message)
Exemplo n.º 4
0
    def test_cache_no_dereference_no_file(self):
        self._prepare_task(deference=False,
                           regular_file=False,
                           regular_file_in_results_dir=False)

        all_vts = self.task.execute()
        self.assertGreater(len(all_vts), 0)
        for vt in all_vts:
            artifact_address = self._get_artifact_path(vt)
            with temporary_dir() as tmpdir:
                with open_tar(artifact_address, 'r') as tarout:
                    tarout.extractall(path=tmpdir)

                file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME)
                self.assertIsNotNone(
                    file_path, "Cannot find file {} in artifact {}".format(
                        SYMLINK_NAME, artifact_address))
                self.assertTrue(
                    os.path.islink(file_path),
                    "{} in artifact {} should be a symlink but it is not.".
                    format(SYMLINK_NAME, artifact_address))
                # The destination of the symlink should be non-existent, hence IOError.
                with self.assertRaises(IOError):
                    with open(file_path, 'r') as f:
                        f.read()
Exemplo n.º 5
0
def _fingerprint_files_in_tar(file_list, tar_location):
  hasher = sha1()
  with open_tar(tar_location, mode='r') as tar:
    for file in file_list.dependencies:
      hasher.update(file.path)
      hasher.update(tar.extractfile(file.path).read())
  return hasher.hexdigest()
Exemplo n.º 6
0
 def extract(self):
   try:
     with open_tar(self._tarfile, 'r', errorlevel=2) as tarin:
       # Note: We create all needed paths proactively, even though extractall() can do this for us.
       # This is because we may be called concurrently on multiple artifacts that share directories,
       # and there will be a race condition inside extractall(): task T1 A) sees that a directory
       # doesn't exist and B) tries to create it. But in the gap between A) and B) task T2 creates
       # the same directory, so T1 throws "File exists" in B).
       # This actually happened, and was very hard to debug.
       # Creating the paths here up front allows us to squelch that "File exists" error.
       paths = []
       dirs = set()
       for tarinfo in tarin.getmembers():
         paths.append(tarinfo.name)
         if tarinfo.isdir():
           dirs.add(tarinfo.name)
         else:
           dirs.add(os.path.dirname(tarinfo.name))
       for d in dirs:
         try:
           os.makedirs(os.path.join(self._artifact_root, d))
         except OSError as e:
           if e.errno != errno.EEXIST:
             raise
       tarin.extractall(self._artifact_root)
       self._relpaths.update(paths)
   except tarfile.ReadError as e:
     raise ArtifactError(e.message)
Exemplo n.º 7
0
 def create(self, basedir, outdir, name, prefix=None):
     tarpath = os.path.join(
         outdir, '%s.%s' % (name.decode('utf-8'), self.extension))
     with open_tar(tarpath, self.mode, dereference=True,
                   errorlevel=1) as tar:
         basedir = basedir.decode('utf-8')
         tar.add(basedir, arcname=prefix or '.')
     return tarpath
Exemplo n.º 8
0
 def create(self, basedir, outdir, name, prefix=None):
     basedir = ensure_text(basedir)
     tarpath = os.path.join(outdir,
                            '%s.%s' % (ensure_text(name), self.extension))
     with open_tar(tarpath, self.mode, dereference=True,
                   errorlevel=1) as tar:
         tar.add(basedir, arcname=prefix or '.')
     return tarpath
Exemplo n.º 9
0
 def create(self, basedir, outdir, name, prefix=None):
   """
   :API: public
   """
   basedir = ensure_text(basedir)
   tarpath = os.path.join(outdir, '{}.{}'.format(ensure_text(name), self.extension))
   with open_tar(tarpath, self.mode, dereference=True, errorlevel=1) as tar:
     tar.add(basedir, arcname=prefix or '.')
   return tarpath
Exemplo n.º 10
0
 def collect(self, paths):
   # In our tests, gzip is slightly less compressive than bzip2 on .class files,
   # but decompression times are much faster.
   mode = 'w:gz' if self._compress else 'w'
   with open_tar(self._tarfile, mode, dereference=True, errorlevel=2) as tarout:
     for path in paths or ():
       # Adds dirs recursively.
       relpath = os.path.relpath(path, self._artifact_root)
       tarout.add(path, relpath)
       self._relpaths.add(relpath)
Exemplo n.º 11
0
  def create(self, basedir, outdir, name, prefix=None, dereference=True):
    """
    :API: public
    """

    basedir = ensure_text(basedir)
    tarpath = os.path.join(outdir, '{}.{}'.format(ensure_text(name), self.extension))
    with open_tar(tarpath, self.mode, dereference=dereference, errorlevel=1) as tar:
      tar.add(basedir, arcname=prefix or '.')
    return tarpath
Exemplo n.º 12
0
 def collect(self, paths):
     # In our tests, gzip is slightly less compressive than bzip2 on .class files,
     # but decompression times are much faster.
     mode = 'w:gz' if self._compress else 'w'
     with open_tar(self._tarfile, mode, dereference=True,
                   errorlevel=2) as tarout:
         for path in paths or ():
             # Adds dirs recursively.
             relpath = os.path.relpath(path, self._artifact_root)
             tarout.add(path, relpath)
             self._relpaths.add(relpath)
Exemplo n.º 13
0
  def collect(self, paths):
    # In our tests, gzip is slightly less compressive than bzip2 on .class files,
    # but decompression times are much faster.
    mode = 'w:gz'

    tar_kwargs = {'dereference': self._dereference, 'errorlevel': 2, 'compresslevel': self._compression}

    with open_tar(self._tarfile, mode, **tar_kwargs) as tarout:
      for path in paths or ():
        # Adds dirs recursively.
        relpath = os.path.relpath(path, self._artifact_root)
        tarout.add(path, relpath)
        self._relpaths.add(relpath)
Exemplo n.º 14
0
    def collect(self, paths):
        # In our tests, gzip is slightly less compressive than bzip2 on .class files,
        # but decompression times are much faster.
        mode = 'w:gz'

        tar_kwargs = {'dereference': True, 'errorlevel': 2}
        tar_kwargs['compresslevel'] = self._compression

        with open_tar(self._tarfile, mode, **tar_kwargs) as tarout:
            for path in paths or ():
                # Adds dirs recursively.
                relpath = os.path.relpath(path, self._artifact_root)
                tarout.add(path, relpath)
                self._relpaths.add(relpath)
Exemplo n.º 15
0
def _create_snapshot_archive(file_list, step_context):
  logger.debug('snapshotting files: {}'.format(file_list))

  # Constructs the snapshot tar in a temporary location, then fingerprints it and moves it to the final path.
  with temporary_file_path(cleanup=False) as tmp_path:
    with open_tar(tmp_path, mode='w') as tar:
      for file in file_list.dependencies:
        # TODO handle GitProjectTree. Using add this this will fail with a non-filesystem project tree.
        tar.add(os.path.join(step_context.project_tree.build_root, file.path), file.path)
    snapshot = Snapshot(_fingerprint_files_in_tar(file_list, tmp_path))
  tar_location = _snapshot_path(snapshot, step_context.snapshot_archive_root)

  shutil.move(tmp_path, tar_location)

  return snapshot
Exemplo n.º 16
0
def _create_snapshot_archive(file_list, step_context):
  logger.debug('snapshotting files: {}'.format(file_list))

  # Constructs the snapshot tar in a temporary location, then fingerprints it and moves it to the final path.
  with temporary_file_path(cleanup=False) as tmp_path:
    with open_tar(tmp_path, mode='w') as tar:
      for file in file_list.dependencies:
        # TODO handle GitProjectTree. Using add this this will fail with a non-filesystem project tree.
        tar.add(os.path.join(step_context.project_tree.build_root, file.path), file.path)
    snapshot = Snapshot(_fingerprint_files_in_tar(file_list, tmp_path))
  tar_location = _snapshot_path(snapshot, step_context.snapshot_archive_root)

  shutil.move(tmp_path, tar_location)

  return snapshot
 def _assert_dereferenced_symlink_in_cache(self, all_vts):
   """
   Assert symlink is dereferenced when in the cache tarball.
   """
   for vt in all_vts:
     artifact_address = self._get_artifact_path(vt)
     with temporary_dir() as tmpdir:
       with open_tar(artifact_address, 'r') as tarout:
         tarout.extractall(path=tmpdir)
       file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME)
       self.assertIsNotNone(file_path, "Cannot find file {} in artifact {}".format(SYMLINK_NAME, artifact_address))
       self.assertFalse(
         os.path.islink(file_path)
         , "{} in artifact {} should not be a symlink but it is.".format(SYMLINK_NAME, artifact_address)
       )
       with open(file_path, 'r') as f:
         self.assertEqual(DUMMY_FILE_CONTENT, f.read())
 def _assert_dereferenced_symlink_in_cache(self, all_vts):
   """
   Assert symlink is dereferenced when in the cache tarball.
   """
   for vt in all_vts:
     artifact_address = self._get_artifact_path(vt)
     with temporary_dir() as tmpdir:
       with open_tar(artifact_address, 'r') as tarout:
         tarout.extractall(path=tmpdir)
       file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME)
       self.assertIsNotNone(file_path, "Cannot find file {} in artifact {}".format(SYMLINK_NAME, artifact_address))
       self.assertFalse(
         os.path.islink(file_path)
         , "{} in artifact {} should not be a symlink but it is.".format(SYMLINK_NAME, artifact_address)
       )
       with open(file_path, 'rb') as f:
         self.assertEqual(DUMMY_FILE_CONTENT, f.read())
Exemplo n.º 19
0
  def parse_tar(self, context, target, expected_image):
    docker_image_products = context.products.get('docker_image')
    self.assertIsNotNone(docker_image_products)
    product_data = docker_image_products.get(target)
    self.assertEqual(1, len(product_data))
    result_dir, result_keys = product_data.items()[0]
    self.assertEqual(['docker_image_name'], result_keys)
    image_name_file = os.path.join(result_dir, result_keys[0])
    with open(image_name_file, 'r') as f:
      result_image_name = f.read()
    self.assertEqual(expected_image, result_image_name)

    with temporary_dir() as result_td:
      result_tar = os.path.join(result_td, 'contents.tar')
      subprocess.check_call(['docker', 'save', '--output=' + result_tar, result_image_name])

      with open_tar(result_tar) as tar:
        yield tar
  def test_cache_no_dereference_file_inside_results_dir(self):
    self._prepare_task(deference=False, regular_file=True, regular_file_in_results_dir=True)

    all_vts = self.task.execute()
    self.assertGreater(len(all_vts), 0)
    for vt in all_vts:
      artifact_address = self._get_artifact_path(vt)
      with temporary_dir() as tmpdir:
        with open_tar(artifact_address, 'r') as tarout:
          tarout.extractall(path=tmpdir)

        file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME)
        self.assertIsNotNone(file_path, "Cannot find file {} in artifact {}".format(SYMLINK_NAME, artifact_address))
        self.assertTrue(
          os.path.islink(file_path),
          "{} in artifact {} should be a symlink but it is not.".format(SYMLINK_NAME, artifact_address)
        )
        with open(file_path, 'rb') as f:
          self.assertEqual(DUMMY_FILE_CONTENT, f.read())
  def test_cache_no_dereference_file_inside_results_dir(self):
    self._prepare_task(deference=False, regular_file=True, regular_file_in_results_dir=True)

    all_vts = self.task.execute()
    self.assertGreater(len(all_vts), 0)
    for vt in all_vts:
      artifact_address = self._get_artifact_path(vt)
      with temporary_dir() as tmpdir:
        with open_tar(artifact_address, 'r') as tarout:
          tarout.extractall(path=tmpdir)

        file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME)
        self.assertIsNotNone(file_path, "Cannot find file {} in artifact {}".format(SYMLINK_NAME, artifact_address))
        self.assertTrue(
          os.path.islink(file_path),
          "{} in artifact {} should be a symlink but it is not.".format(SYMLINK_NAME, artifact_address)
        )
        with open(file_path, 'r') as f:
          self.assertEqual(DUMMY_FILE_CONTENT, f.read())
Exemplo n.º 22
0
    def test_repro(self):
        with temporary_dir() as tmpdir:
            fake_buildroot = os.path.join(tmpdir, 'buildroot')

            def add_file(path, content):
                fullpath = os.path.join(fake_buildroot, path)
                safe_mkdir_for(fullpath)
                with open(fullpath, 'w') as outfile:
                    outfile.write(content)

            add_file('.git/foo', 'foo')
            add_file('dist/bar', 'bar')
            add_file('baz.txt', 'baz')
            add_file('qux/quux.txt', 'quux')

            repro_file = os.path.join(tmpdir, 'repro.tar.gz')
            repro = Repro(repro_file, fake_buildroot, ['.git', 'dist'])
            repro.capture(run_info_dict={'foo': 'bar', 'baz': 'qux'})

            extract_dir = os.path.join(tmpdir, 'extract')
            with open_tar(repro_file, 'r:gz') as tar:
                tar.extractall(extract_dir)

            def assert_not_exists(relpath):
                fullpath = os.path.join(extract_dir, relpath)
                self.assertFalse(os.path.exists(fullpath))

            def assert_file(relpath, expected_content=None):
                fullpath = os.path.join(extract_dir, relpath)
                self.assertTrue(os.path.isfile(fullpath))
                if expected_content:
                    with open(fullpath, 'r') as infile:
                        content = infile.read()
                    self.assertEquals(expected_content, content)

            assert_file('baz.txt', 'baz')
            assert_file('qux/quux.txt', 'quux')
            assert_file('repro.sh')

            assert_not_exists('.git')
            assert_not_exists('dist')
  def test_cache_no_dereference_no_file(self):
    self._prepare_task(deference=False, regular_file=False, regular_file_in_results_dir=False)

    all_vts = self.task.execute()
    self.assertGreater(len(all_vts), 0)
    for vt in all_vts:
      artifact_address = self._get_artifact_path(vt)
      with temporary_dir() as tmpdir:
        with open_tar(artifact_address, 'r') as tarout:
          tarout.extractall(path=tmpdir)

        file_path = self._find_first_file_in_path(tmpdir, SYMLINK_NAME)
        self.assertIsNotNone(file_path, "Cannot find file {} in artifact {}".format(SYMLINK_NAME, artifact_address))
        self.assertTrue(
          os.path.islink(file_path),
          "{} in artifact {} should be a symlink but it is not.".format(SYMLINK_NAME, artifact_address)
        )
        # The destination of the symlink should be non-existent, hence IOError.
        with self.assertRaises(IOError):
          with open(file_path, 'rb') as f:
            f.read()
Exemplo n.º 24
0
  def test_repro(self):
    with temporary_dir() as tmpdir:
      fake_buildroot = os.path.join(tmpdir, 'buildroot')
      def add_file(path, content):
        fullpath = os.path.join(fake_buildroot, path)
        safe_mkdir_for(fullpath)
        with open(fullpath, 'w') as outfile:
          outfile.write(content)

      add_file('.git/foo', 'foo')
      add_file('dist/bar', 'bar')
      add_file('baz.txt', 'baz')
      add_file('qux/quux.txt', 'quux')

      repro_file = os.path.join(tmpdir, 'repro.tar.gz')
      repro = Repro(repro_file, fake_buildroot, ['.git', 'dist'])
      repro.capture(run_info_dict={'foo': 'bar', 'baz': 'qux'})

      extract_dir = os.path.join(tmpdir, 'extract')
      with open_tar(repro_file, 'r:gz') as tar:
        tar.extractall(extract_dir)

      def assert_not_exists(relpath):
        fullpath = os.path.join(extract_dir, relpath)
        self.assertFalse(os.path.exists(fullpath))

      def assert_file(relpath, expected_content=None):
        fullpath = os.path.join(extract_dir, relpath)
        self.assertTrue(os.path.isfile(fullpath))
        if expected_content:
          with open(fullpath, 'r') as infile:
            content = infile.read()
          self.assertEquals(expected_content, content)

      assert_file('baz.txt', 'baz')
      assert_file('qux/quux.txt', 'quux')
      assert_file('repro.sh')

      assert_not_exists('.git')
      assert_not_exists('dist')
Exemplo n.º 25
0
  def capture(self, run_info_dict):
    # Force the scm discovery logging messages to appear before ours, so the startup delay
    # is properly associated in the user's mind with us and not with scm.
    logger.info('Capturing repro information to {}'.format(self._path))
    with open_tar(self._path, 'w:gz', dereference=True, compresslevel=6) as tarout:
      for relpath in os.listdir(self._buildroot):
        if relpath not in self._ignore:
          tarout.add(os.path.join(self._buildroot, relpath), relpath)

      with temporary_file() as tmpfile:
        tmpfile.write('# Pants repro captured for the following build:\n')
        for k, v in sorted(run_info_dict.items()):
          tmpfile.write('#  {}: {}\n'.format(k, v))
        cmd_line = list(sys.argv)
        # Use 'pants' instead of whatever the full executable path was on the user's system.
        cmd_line[0] = 'pants'
        # Remove any repro-related flags. The repro-ing user won't want to call those.
        cmd_line = [x for x in cmd_line if not x.startswith('--repro-')]
        tmpfile.write("'" +"' '".join(cmd_line) + "'\n")
        tmpfile.flush()
        chmod_plus_x(tmpfile.name)
        tarout.add(tmpfile.name, 'repro.sh')
Exemplo n.º 26
0
 def _extract(cls, path, outdir, **kwargs):
     with open_tar(path, errorlevel=1) as tar:
         tar.extractall(outdir)
Exemplo n.º 27
0
 def _extract(self, path_or_file, outdir, **kwargs):
   with open_tar(path_or_file, errorlevel=1, **kwargs) as tar:
     tar.extractall(outdir)
Exemplo n.º 28
0
 def create(self, basedir, outdir, name, prefix=None):
   tarpath = os.path.join(outdir, '%s.%s' % (name.decode('utf-8'), self.extension))
   with open_tar(tarpath, self.mode, dereference=True, errorlevel=1) as tar:
     basedir = basedir.decode('utf-8')
     tar.add(basedir, arcname=prefix or '.')
   return tarpath
Exemplo n.º 29
0
def _extract_snapshot(step_context, snapshot, sandbox_dir, subject):
  with open_tar(_snapshot_path(snapshot, step_context.snapshot_archive_root), errorlevel=1) as tar:
    tar.extractall(sandbox_dir)
  logger.debug('extracted {} snapshot to {}'.format(subject, sandbox_dir))
Exemplo n.º 30
0
 def extract(cls, path, outdir):
   """
   :API: public
   """
   with open_tar(path, errorlevel=1) as tar:
     tar.extractall(outdir)
Exemplo n.º 31
0
 def _extract(self, path_or_file, outdir, **kwargs):
   with open_tar(path_or_file, errorlevel=1, **kwargs) as tar:
     if PY2:
       outdir = outdir.encode('utf-8')
     tar.extractall(outdir)
Exemplo n.º 32
0
 def _extract(self, path_or_file, outdir, **kwargs):
     with open_tar(path_or_file, errorlevel=1, **kwargs) as tar:
         if PY2:
             outdir = outdir.encode('utf-8')
         tar.extractall(outdir)
Exemplo n.º 33
0
 def assert_archive_files(self, expected_archive_files, snapshot, snapshot_archive_root):
   with open_tar(_snapshot_path(snapshot, snapshot_archive_root), errorlevel=1) as tar:
     self.assertEqual(sorted(expected_archive_files), sorted(tar.getnames()))
Exemplo n.º 34
0
def _extract_snapshot(snapshot_archive_root, snapshot, sandbox_dir):
    with open_tar(_snapshot_path(snapshot, snapshot_archive_root),
                  errorlevel=1) as tar:
        tar.extractall(sandbox_dir)
Exemplo n.º 35
0
 def _extract(self, path_or_file, outdir, **kwargs):
   with open_tar(path_or_file, errorlevel=1, **kwargs) as tar:
     tar.extractall(outdir)
Exemplo n.º 36
0
 def assert_archive_files(self, expected_archive_files, snapshot,
                          snapshot_archive_root):
     with open_tar(_snapshot_path(snapshot, snapshot_archive_root),
                   errorlevel=1) as tar:
         self.assertEqual(sorted(expected_archive_files),
                          sorted(tar.getnames()))
Exemplo n.º 37
0
def _extract_snapshot(snapshot_archive_root, snapshot, sandbox_dir, subject):
  with open_tar(_snapshot_path(snapshot, snapshot_archive_root), errorlevel=1) as tar:
    tar.extractall(sandbox_dir)
  logger.debug('extracted {} snapshot to {}'.format(subject, sandbox_dir))
Exemplo n.º 38
0
 def extract(cls, path, outdir):
     """
 :API: public
 """
     with open_tar(path, errorlevel=1) as tar:
         tar.extractall(outdir)
Exemplo n.º 39
0
def _extract_snapshot(snapshot_archive_root, snapshot, sandbox_dir):
  with open_tar(_snapshot_path(snapshot, snapshot_archive_root), errorlevel=1) as tar:
    tar.extractall(sandbox_dir)
Exemplo n.º 40
0
 def extract(cls, path, outdir):
   with open_tar(path, errorlevel=1) as tar:
     tar.extractall(outdir)
Exemplo n.º 41
0
 def assert_archive_files(self, expected_archive_files, snapshot, step_context):
   with open_tar(_snapshot_path(snapshot, step_context.snapshot_archive_root), errorlevel=1) as tar:
     self.assertEqual(expected_archive_files,
                      [tar_info.path for tar_info in tar.getmembers()])