示例#1
0
class ArchiveStorage:
    def __init__(self, container):
        kws = dict(container=container,
                   content_type="application/octet-stream",
                   cache_control="no-cache, max-age=0, must-revalidate",
                   compressed=True,
                   tier='Archive')

        self.client = StorageClient(**kws)
        self.container = self.client.get_container()
        self.content_settings = getattr(self.client, '_content_settings')

    def upload(self, path: str, data: Union[Iterable[AnyStr], IO[AnyStr]]):
        self.container.upload_blob(data=data,
                                   name=path,
                                   content_settings=self.content_settings,
                                   overwrite=True,
                                   standard_blob_tier=StandardBlobTier.Archive,
                                   timeout=60,
                                   max_concurrency=10)

    def download(self, path: str) -> StorageStreamDownloader:
        return self.container.download_blob(path)

    def ls_of(self, prefix):
        return self.container.walk_blobs(name_starts_with=prefix)

    def __enter__(self) -> 'ArchiveStorage':
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.container.close()
def store_data():
    kws = dict(container="pipeline",
               content_type="application/octet-stream",
               cache_control="no-cache, max-age=0, must-revalidate",
               compressed=False,
               tier='Cool')

    client = StorageClient(**kws)
    container = client.get_container()
    content_settings = getattr(client, '_content_settings')

    def upload(data: DataFrame, category: str, subcategory: Union[str, None],
               date: str):

        area_type = data.iloc[0].areaType
        area_code = data.iloc[0].areaCode

        if subcategory:
            path = f"etl/{category}/{subcategory}/{date}/{area_type}_{area_code}.ft"
        else:
            path = f"etl/{category}/{date}/{area_type}_{area_code}.ft"

        with TemporaryFile() as fp:
            _ = (data.sort_values(
                ["areaType", "areaCode", "date"],
                ascending=[True, True, False]).dropna(
                    how='all', axis=1).reset_index(drop=True).to_feather(fp))
            fp.seek(0)

            container.upload_blob(data=fp,
                                  name=path,
                                  content_settings=content_settings,
                                  overwrite=True,
                                  standard_blob_tier=StandardBlobTier.Cool,
                                  timeout=60,
                                  max_concurrency=10)

        response = {
            "path": path,
            "area_type": area_type,
            "area_code": area_code,
            "category": category,
            "subcategory": subcategory,
            "date": date
        }

        return response

    return upload