示例#1
0
 def upload_from_string(self, s):
     """Utility method for uploading this blob; not used by the
     GoogleCloudStorage backend, but used to pre-populate the GCS
     mock for testing
     """
     self.updated = utcnow()
     self._content = s
     self.bucket._upload_blob(self)
 def _make_package(self, *args, **kwargs):
     """ Wrapper around make_package """
     # Some SQL dbs are rounding the timestamps (looking at you MySQL >:|
     # which is a problem if they round UP to the future, as our
     # calculations depend on the timestamps being monotonically increasing.
     now = utcnow() - timedelta(seconds=1)
     kwargs.setdefault("last_modified", now)
     kwargs.setdefault("factory", SQLPackage)
     return make_package(*args, **kwargs)
 def list_storage(factory):
     """ mocked method for listing storage packages """
     # The first time we list from storage, concurrently "upload"
     # pkgs[2]
     if len(return_values) == 2:
         nowish = utcnow() + timedelta(seconds=1)
         pkg = self._make_package("mypkg3", last_modified=nowish)
         pkgs.append(pkg)
         self.db.save(pkg)
     return return_values.pop(0)()
 def test_add_missing_more_recent(self):
     """ If we sync a more recent package, update the summary """
     pkgs = [
         self._make_package(last_modified=utcnow() - timedelta(hours=1)),
         self._make_package(version="1.5"),
     ]
     self.db.save(pkgs[0])
     self.storage.list.return_value = pkgs
     self.db.reload_from_storage()
     all_pkgs = self.sql.query(SQLPackage).all()
     self.assertItemsEqual(all_pkgs, pkgs)
示例#5
0
def make_package(name="mypkg",
                 version="1.1",
                 filename=None,
                 last_modified=None,
                 summary="summary",
                 factory=Package,
                 **kwargs):
    """Convenience method for constructing a package"""
    filename = filename or "%s-%s.tar.gz" % (name, version)
    return factory(name, version, filename, last_modified or utcnow(), summary,
                   **kwargs)
示例#6
0
文件: s3.py 项目: incrmntal/pypicloud
    def _generate_url(self, package):
        """ Get the fully-qualified CloudFront path for a package """
        path = self.get_path(package)
        url = self.domain + "/" + quote(path)

        # No key id, no signer, so we don't have to sign the URL
        if self.cf_signer is None:
            return url

        # To sign with a canned policy:
        expires = utcnow() + timedelta(seconds=self.expire_after)
        return self.cf_signer.generate_presigned_url(url,
                                                     date_less_than=expires)
 def test_add_missing_more_recent(self):
     """ If we sync a more recent package, update the summary """
     pkgs = [
         make_package(last_modified=utcnow() - timedelta(hours=1)),
         make_package(version="1.5"),
     ]
     self.db.save(pkgs[0])
     self.storage.list.return_value = pkgs
     self.db.reload_from_storage()
     all_pkgs = self.db._load_all_packages()
     self.assertItemsEqual(all_pkgs, pkgs)
     summaries = self.db.summary()
     self.assertEqual(len(summaries), 1)
     summary = summaries[0]
     self.assertEqual(summary["last_modified"].hour, pkgs[1].last_modified.hour)
示例#8
0
 def test_add_missing_more_recent(self):
     """If we sync a more recent package, update the summary"""
     pkgs = [
         make_package(
             last_modified=utcnow() - timedelta(hours=1),
             factory=DynamoPackage,
         ),
         make_package(version="1.5", factory=DynamoPackage),
     ]
     self.db.save(pkgs[0])
     self.storage.list.return_value = pkgs
     self.db.reload_from_storage()
     all_pkgs = self.engine.scan(DynamoPackage).all()
     self.assertCountEqual(all_pkgs, pkgs)
     summaries = self.db.summary()
     self.assertEqual(len(summaries), 1)
     summary = summaries[0]
     self.assertEqual(summary["last_modified"], pkgs[1].last_modified)
示例#9
0
    def reload_from_storage(self, clear=True):
        if not self.graceful_reload:
            return super(SQLCache, self).reload_from_storage(clear)

        LOG.info("Rebuilding cache from storage")
        # Log start time
        start = utcnow()
        # Fetch packages from storage s1
        s1 = set(self.storage.list(SQLPackage))
        # Fetch cache packages c1
        c1 = set(self.db.query(SQLPackage).all())
        # Add missing packages to cache (s1 - c1)
        missing = s1 - c1
        if missing:
            LOG.info("Adding %d missing packages to cache", len(missing))
            for pkg in missing:
                self.db.merge(pkg)
        # Delete extra packages from cache (c1 - s1) when last_modified < start
        # The time filter helps us avoid deleting packages that were
        # concurrently uploaded.
        extra1 = [p for p in (c1 - s1) if p.last_modified <= start]
        if extra1:
            LOG.info("Removing %d extra packages from cache", len(extra1))
            for pkg in extra1:
                self.db.query(SQLPackage).filter(
                    SQLPackage.filename == pkg.filename).delete(
                        synchronize_session=False)

        # If any packages were concurrently deleted during the cache rebuild,
        # we can detect them by polling storage again and looking for any
        # packages that were present in s1 and are missing from s2
        s2 = set(self.storage.list(SQLPackage))
        # Delete extra packages from cache (s1 - s2)
        extra2 = s1 - s2
        if extra2:
            LOG.info(
                "Removing %d packages from cache that were concurrently "
                "deleted during rebuild",
                len(extra2),
            )
            for pkg in extra2:
                self.db.query(SQLPackage).filter(
                    SQLPackage.filename == pkg.filename).delete(
                        synchronize_session=False)
示例#10
0
def handle_s3_event(event, context):
    """Handle S3 object notification"""
    from pypicloud.cache import get_cache_impl
    from pypicloud.dateutil import utcnow
    from pypicloud.storage.s3 import S3Storage
    from pypicloud.util import parse_filename

    settings = json.loads(os.environ["PYPICLOUD_SETTINGS"])
    # Set 'file' storage as a hack. We're going to load the cache, which will
    # load a storage. We won't actually be using the storage for anything, but
    # the settings have to be present.
    settings.setdefault("pypi.storage", "file")
    settings.setdefault("storage.dir", "/tmp")
    cache_impl = get_cache_impl(settings)
    kwargs = cache_impl.configure(settings)
    cache = cache_impl(**kwargs)

    s3 = boto3.resource("s3")
    for record in event["Records"]:
        bucket = record["s3"]["bucket"]["name"]
        key = record["s3"]["object"]["key"]
        event_name = record["eventName"]
        if event_name.startswith("ObjectCreated"):
            print("S3 object %r created" % key)
            obj = s3.Object(bucket, key)
            package = S3Storage.package_from_object(obj, cache.new_package)
            existing_pkg = cache.fetch(package.filename)
            if existing_pkg is None:
                print("Saving package %s" % package)
                cache.save(package)
            else:
                print("Package already cached")
        else:
            print("S3 object %r deleted" % key)
            filename = posixpath.basename(key)
            try:
                name, version = parse_filename(filename)
            except ValueError:
                name = version = "dummy"
            package = cache.new_package(name, version, filename, utcnow(), "")
            print("Deleting package %s" % package)
            cache.clear(package)
示例#11
0
    def _generate_url(self, package: Package) -> str:
        path = self.get_path(package)

        url_params = generate_blob_sas(
            account_name=self.storage_account_name,
            container_name=self.storage_container_name,
            blob_name=path,
            account_key=self.storage_account_key,
            permission=BlobSasPermissions(read=True),
            expiry=utcnow() + timedelta(seconds=self.expire_after),
            protocol="https",
        )

        url = "{}/{}/{}?{}".format(
            self.azure_storage_account_url,
            self.storage_container_name,
            path,
            url_params,
        )
        return url
示例#12
0
    def reload_from_storage(self, clear=True):
        if not self.graceful_reload:
            return super(DynamoCache, self).reload_from_storage(clear)
        LOG.info("Rebuilding cache from storage")
        # Log start time
        start = utcnow()
        # Fetch packages from storage s1
        s1 = set(self.storage.list(self.new_package))
        # Fetch cache packages c1
        c1 = set(self.engine.scan(DynamoPackage))
        # Add missing packages to cache (s1 - c1)
        missing = s1 - c1
        if missing:
            LOG.info("Adding %d missing packages to cache", len(missing))
            self.engine.save(missing)
        # Delete extra packages from cache (c1 - s1) when last_modified < start
        # The time filter helps us avoid deleting packages that were
        # concurrently uploaded.
        extra1 = [p for p in (c1 - s1) if p.last_modified < start]
        if extra1:
            LOG.info("Removing %d extra packages from cache", len(extra1))
            self.engine.delete(extra1)

        # If any packages were concurrently deleted during the cache rebuild,
        # we can detect them by polling storage again and looking for any
        # packages that were present in s1 and are missing from s2
        s2 = set(self.storage.list(self.new_package))
        # Delete extra packages from cache (s1 - s2)
        extra2 = s1 - s2
        if extra2:
            LOG.info(
                "Removing %d packages from cache that were concurrently "
                "deleted during rebuild",
                len(extra2),
            )
            self.engine.delete(extra2)
            # Remove these concurrently-deleted files from the list of packages
            # that were missing from the cache. Don't want to use those to
            # update the summaries below.
            missing -= extra2

        # Update the PackageSummary for added packages
        packages_by_name = defaultdict(list)
        for package in missing:
            # Set the tz here so we can compare against the PackageSummary
            package.last_modified = package.last_modified.replace(tzinfo=UTC)
            packages_by_name[package.name].append(package)
        summaries = self.engine.get(PackageSummary, packages_by_name.keys())
        summaries_by_name = {}
        for summary in summaries:
            summaries_by_name[summary.name] = summary
        for name, packages in packages_by_name.items():
            if name in summaries_by_name:
                summary = summaries_by_name[name]
            else:
                summary = PackageSummary(packages[0])
                summaries.append(summary)
            for package in packages:
                if package.last_modified > summary.last_modified:
                    summary.last_modified = package.last_modified
                    summary.summary = package.summary
        if summaries:
            LOG.info("Updating %d package summaries", len(summaries))
            self.engine.save(summaries, overwrite=True)

        # Remove the PackageSummary for deleted packages
        removed = set()
        for package in extra1:
            removed.add(package.name)
        for package in extra2:
            removed.add(package.name)
        for name in removed:
            self._maybe_delete_summary(name)
示例#13
0
    def reload_from_storage(self, clear=True):
        if not self.graceful_reload:
            if clear:
                self.clear_all()
            packages = self.storage.list(self.new_package)
            pipe = self.db.pipeline()
            for pkg in packages:
                self.save(pkg, pipe=pipe)
            pipe.execute()
            return

        LOG.info("Rebuilding cache from storage")
        # Log start time
        start = utcnow()
        # Fetch packages from storage s1
        s1 = set(self.storage.list(self.new_package))
        # Fetch cache packages c1
        c1 = set(self._load_all_packages())

        # Add missing packages to cache (s1 - c1)
        missing = s1 - c1
        if missing:
            LOG.info("Adding %d missing packages to cache", len(missing))
            pipe = self.db.pipeline()
            for package in missing:
                self.save(package, pipe, save_summary=False)
            pipe.execute()

        # Delete extra packages from cache (c1 - s1) when last_modified < start
        # The time filter helps us avoid deleting packages that were
        # concurrently uploaded.
        extra1 = [p for p in (c1 - s1) if p.last_modified < start]
        if extra1:
            LOG.info("Removing %d extra packages from cache", len(extra1))
            pipe = self.db.pipeline()
            for package in extra1:
                self._delete_package(package, pipe)
            pipe.execute()

        # If any packages were concurrently deleted during the cache rebuild,
        # we can detect them by polling storage again and looking for any
        # packages that were present in s1 and are missing from s2
        s2 = set(self.storage.list(self.new_package))
        # Delete extra packages from cache (s1 - s2)
        extra2 = s1 - s2
        if extra2:
            LOG.info(
                "Removing %d packages from cache that were concurrently "
                "deleted during rebuild",
                len(extra2),
            )
            pipe = self.db.pipeline()
            for package in extra2:
                self._delete_package(package, pipe)
            pipe.execute()
            # Remove these concurrently-deleted files from the list of packages
            # that were missing from the cache. Don't want to use those to
            # update the summaries below.
            missing -= extra2

        # Update the summary for added packages
        packages_by_name = defaultdict(list)
        for package in missing:
            package.last_modified = package.last_modified
            packages_by_name[package.name].append(package)

        summaries = self._load_summaries(packages_by_name.keys())
        summaries_by_name = {}
        for summary in summaries:
            summaries_by_name[summary["name"]] = summary
        for name, packages in packages_by_name.items():
            if name in summaries_by_name:
                summary = summaries_by_name[name]
            else:
                summary = summary_from_package(packages[0])
                summaries.append(summary)
            for package in packages:
                if package.last_modified > summary["last_modified"]:
                    summary["last_modified"] = package.last_modified
                    summary["summary"] = package.summary
        if summaries:
            LOG.info("Updating %d package summaries", len(summaries))
            pipe = self.db.pipeline()
            for summary in summaries:
                self._save_summary(summary, pipe)
            pipe.execute()

        # Remove the PackageSummary for deleted packages
        removed = set()
        for package in extra1:
            removed.add(package.name)
        for package in extra2:
            removed.add(package.name)
        if removed:
            pipe = self.db.pipeline()
            for name in removed:
                pipe.scard(self.redis_filename_set(name))
            counts = pipe.execute()
            pipe = self.db.pipeline()
            for name, count in zip(removed, counts):
                if count == 0:
                    self._delete_summary(name, pipe)
            pipe.execute()