Exemplo n.º 1
0
    def test_to_csv_zip(self):

        try:
            # Test using the to_csv() method
            self.tbl.to_csv('myzip.zip')
            tmp = zip_archive.unzip_archive('myzip.zip')
            assert_matching_tables(self.tbl, Table.from_csv(tmp))

            # Test using the to_csv_zip() method
            self.tbl.to_zip_csv('myzip.zip')
            tmp = zip_archive.unzip_archive('myzip.zip')
            assert_matching_tables(self.tbl, Table.from_csv(tmp))
        finally:
            os.unlink('myzip.zip')
Exemplo n.º 2
0
    def from_s3_csv(cls,
                    bucket,
                    key,
                    from_manifest=False,
                    aws_access_key_id=None,
                    aws_secret_access_key=None,
                    **csvargs):
        """
        Create a ``parsons table`` from a key in an S3 bucket.

        `Args:`
            bucket: str
                The S3 bucket.
            key: str
                The S3 key
            from_manifest: bool
                If True, treats `key` as a manifest file and loads all urls into a `parsons.Table`.
                Defaults to False.
            aws_access_key_id: str
                Required if not included as environmental variable.
            aws_secret_access_key: str
                Required if not included as environmental variable.
            \**csvargs: kwargs
                ``csv_reader`` optional arguments
        `Returns:`
            `parsons.Table` object
        """  # noqa: W605

        from parsons.aws import S3
        s3 = S3(aws_access_key_id, aws_secret_access_key)

        if from_manifest:
            with open(s3.get_file(bucket, key)) as fd:
                manifest = json.load(fd)

            s3_keys = [x["url"] for x in manifest["entries"]]

        else:
            s3_keys = [f"s3://{bucket}/{key}"]

        tbls = []
        for key in s3_keys:
            # TODO handle urls that end with '/', i.e. urls that point to "folders"
            _, _, bucket_, key_ = key.split("/", 3)
            file_ = s3.get_file(bucket_, key_)
            if files.compression_type_for_path(key_) == 'zip':
                file_ = zip_archive.unzip_archive(file_)

            tbls.append(petl.fromcsv(file_, **csvargs))

        return cls(petl.cat(*tbls))