Exemplo n.º 1
0
def skip_blob(blob_name, bucket_name="project_vaxx"):
    bucket_t = storage_client.bucket(bucket_name, user_project=None)
    temp = storage.Blob(blob_name, bucket_t)
    # # print('Blob {} deleted.'.format(temp))
    blob = bucket.blob(blob_name)
    name = "yyy-" + blob_name
    blob = bucket_t.rename_blob(blob, name)
Exemplo n.º 2
0
    def test_copy_existing_file_with_user_project(self):
        new_bucket_name = 'copy-w-requester-pays' + unique_resource_id('-')
        created = Config.CLIENT.create_bucket(new_bucket_name,
                                              requester_pays=True)
        self.case_buckets_to_delete.append(new_bucket_name)
        self.assertEqual(created.name, new_bucket_name)
        self.assertTrue(created.requester_pays)

        to_delete = []
        blob = storage.Blob('simple', bucket=created)
        blob.upload_from_string(b'DEADBEEF')
        to_delete.append(blob)
        try:
            with_user_project = Config.CLIENT.bucket(new_bucket_name,
                                                     user_project=USER_PROJECT)

            new_blob = retry_bad_copy(with_user_project.copy_blob)(
                blob, with_user_project, 'simple-copy')
            to_delete.append(new_blob)

            base_contents = blob.download_as_string()
            copied_contents = new_blob.download_as_string()
            self.assertEqual(base_contents, copied_contents)
        finally:
            for blob in to_delete:
                retry_429(blob.delete)()
Exemplo n.º 3
0
    def test_copy(self, mock_service, mock_bucket):
        source_bucket = 'test-source-bucket'
        source_object = 'test-source-object'
        destination_bucket = 'test-dest-bucket'
        destination_object = 'test-dest-object'

        destination_bucket_instance = mock_bucket
        source_blob = mock_bucket.blob(source_object)
        destination_blob = storage.Blob(bucket=destination_bucket_instance,
                                        name=destination_object)

        # Given
        bucket_mock = mock_service.return_value.bucket
        bucket_mock.return_value = mock_bucket
        copy_method = bucket_mock.return_value.copy_blob
        copy_method.return_value = destination_blob

        # When
        response = self.gcs_hook.copy(  # pylint:disable=assignment-from-no-return
            source_bucket=source_bucket,
            source_object=source_object,
            destination_bucket=destination_bucket,
            destination_object=destination_object)

        # Then
        self.assertEqual(response, None)
        copy_method.assert_called_once_with(
            blob=source_blob,
            destination_bucket=destination_bucket_instance,
            new_name=destination_object)
Exemplo n.º 4
0
def get_ondemand_bot_log(intended_user, *, user_id):
    if user_id != intended_user:
        raise web_util.user_mismatch_error(
            message="Cannot get bot log for another user.")

    bucket = model.get_ondemand_replay_bucket()
    blob = gcloud_storage.Blob("ondemand_bot_log_{}".format(user_id),
                               bucket,
                               chunk_size=262144)
    buffer = io.BytesIO()

    try:
        blob.download_to_file(buffer)
    except gcloud_exceptions.NotFound:
        raise util.APIError(404, message="Error log not found.")

    buffer.seek(0)
    response = web_util.no_cache(
        flask.make_response(
            flask.send_file(buffer,
                            mimetype="text/plain",
                            as_attachment=True,
                            attachment_filename="{}.log".format(user_id))))

    response.headers["Content-Length"] = str(buffer.getbuffer().nbytes)

    return response
Exemplo n.º 5
0
def gcs_upload(local_path, gcs_path, project_id=None, force=False):
  bucket_path, filename = os.path.split(gcs_path)
  bucket_name = os.path.basename(bucket_path)
  
  if project_id is None:
    client = GcsClient.client
  else:
    client = storage.Client( project=project_id )

  try:
    result = gsutil_ls(bucket_name, filter=filename, project_id=project_id)
    # result = __shell__("gsutil ls {}".format(BUCKET_PATH, split=False))
    if "BucketNotFoundException" in result: 
      raise ValueError( "ERROR: bucket not found, path={}".format(bucket_name))
    if result and not force:
      raise Warning("WARNING: gcs file already exists, use force=True. bucket={}".format(bucket_name))

    # client = storage.Client( project=project_id )
    bucket = client.get_bucket(bucket_name)
    blob = storage.Blob(filename, bucket)
    print("uploading file={} ...".format(gcs_path))
    blob.upload_from_filename(local_path)
    return gcs_path

  except exceptions.NotFound:
    raise ValueError("BucketNotFoundException: GCS bucket not found, path={}".format(bucket_path))
  except Exception as e:
    print(e)
Exemplo n.º 6
0
 def file_exists(self, filename):
     """
     Check if 'filename' file exists within bucket
     :param filename:
     :return: (Bool)
     """
     return storage.Blob(filename, self._bucket).exists(self._gcsclient)
Exemplo n.º 7
0
def gcs_download(gcs_path, local_path, project_id=None, force=False):
    bucket_path, filename = os.path.split(gcs_path)
    bucket_name = os.path.basename(bucket_path)
    if os.path.isfile(local_path) and not force:
        raise Warning(
            "WARNING: local file already exists, use force=True. path={}".
            format(local_path))

    if project_id is None:
        client = GcsClient.client
    else:
        client = storage.Client(project=project_id)

    try:
        # client = storage.Client( project=project_id )
        bucket = client.get_bucket(bucket_name)
        blob = storage.Blob(filename, bucket)
        print("downloading file={} ...".format(gcs_path))
        blob.download_to_filename(local_path)
        return local_path

    except exceptions.NotFound:
        raise ValueError(
            "BucketNotFoundException: GCS bucket not found, path={}".format(
                bucket_path))
    except Exception as e:
        print(e)
Exemplo n.º 8
0
 def blobExists(self, location):
     '''
     Check if file is located in GCP bucket
     :param location: string, the blob location string
     :return: boolean
     '''
     return storage.Blob(bucket=self.BUCKET, name=location).exists()
Exemplo n.º 9
0
def write_config_to_bucket(activity_json):
    #if WRITE_EPOCH is True:
    if len(activity_json) > 0:
        logger.log_text(
            f"{FUNCTION_NAME}: Updating configuration with epoch value of the current time: {datetime.datetime.fromtimestamp(int(CALLED_EPOCH)).strftime('%Y-%m-%d %H:%M:%S')}"
        )
        CONFIGURATION.set('strava_client', 'strava_current_epoch',
                          f'{CALLED_EPOCH}')
    else:
        logger.log_text(
            f'{FUNCTION_NAME}: No activities returned; so not updating the epoch'
        )

    GCS_BUCKET = os.environ.get(KEY_EV_GCS_BUCKET)
    CONFIG_FILE = os.environ.get(KEY_EV_CONFIG_FILE)
    localconfig = StringIO()
    CONFIGURATION.write(localconfig)
    localconfig.seek(0)
    logger.log_text(
        f'{FUNCTION_NAME}: Writing {CONFIG_FILE} to bucket: {localconfig.read()}'
    )
    try:
        client = storage.Client()
        bucket = client.get_bucket(GCS_BUCKET)
        blob = storage.Blob(CONFIG_FILE, bucket)
        localconfig.seek(0)
        blob.upload_from_file(localconfig)
    except:
        logger.log_text(
            f'{FUNCTION_NAME}: Error while transacting with GCS: {sys.exc_info()}'
        )
        raise RuntimeError(
            f'Error while transacting with GCS: {sys.exc_info()}')
    localconfig.close()
Exemplo n.º 10
0
 def check_schema_stg(self, tag):
     # Get schemas bucket from other project
     external_credentials = self.gcp_helper.request_auth_token()
     storage_client_external = storage.Client(
         credentials=external_credentials)
     storage_bucket = storage_client_external.get_bucket(
         config.SCHEMAS_BUCKET)
     # Get schema name from tag
     tag = tag.replace('/', '_')
     if not tag.endswith(".json"):
         tag = tag + ".json"
     blob_name = tag
     # Check if schema is in schema storage
     if storage.Blob(bucket=storage_bucket,
                     name=blob_name).exists(storage_client_external):
         # Get blob
         blob = storage_bucket.get_blob(blob_name)
         if blob:
             # Convert to string
             blob_json_string = blob.download_as_string()
             # Convert to json
             blob_json = json.loads(blob_json_string)
             # return blob in json format
             return blob_json
     return None
Exemplo n.º 11
0
def _lock_down_bucket(spinner, cloud_logger, bucket, lock_file_name,
                      service_account_email):
    """Change the ACL/IAM on the bucket so that only the service account can access it.

    Args:
        spinner: The spinner displayed in the console
        cloud_logger: A GCP logging client instance
        bucket: The bucket object to lock down
        lock_file_name: The name of the lock file
        service_account_email: The email of the service account
    """

    if storage.Blob(lock_file_name, bucket).exists():
        spinner.fail('X')
        msg = 'The lock file exists in the source bucket, so we cannot continue'
        cloud_logger.log_text(msg)
        raise SystemExit(msg)

    spinner.ok(_CHECKMARK)
    msg = 'Locking down the bucket by revoking all ACLs/IAM policies'
    spinner.text = msg
    cloud_logger.log_text(msg)

    # Turn off any bucket ACLs
    bucket.acl.save_predefined('private')

    # Revoke all IAM access and only set the service account as an admin
    policy = api_core_iam.Policy()
    policy['roles/storage.admin'].add('serviceAccount:' +
                                      service_account_email)
    bucket.set_iam_policy(policy)
Exemplo n.º 12
0
 def read_block(self, offset):
     try:
         blob = storage.Blob(str(offset), self.gcp_bucket)
         block = blob.download_as_string()
         return bytearray(block)
     except NotFound:
         return bytearray([])
Exemplo n.º 13
0
def check_bucket(file, bucketID):
    """
	checks bucket for file and returns bool
	"""
    bucket = storage_client.bucket(bucketID)
    check = storage.Blob(bucket=bucket, name=file).exists(storage_client)
    return check
Exemplo n.º 14
0
def convert():
    blobs = bucket.list_blobs()
    counter = 0
    for blob in blobs:
        just_name = os.path.splitext(blob.name)[0]
        if just_name.startswith("ogg/") or just_name.startswith("wav/"):
            continue
        counter += 1
        if is_converted(blob):
            print("++++Already converted. Deleting.")
            blob.delete()
            print("++++Deleted")
            continue
        print(blob.name)
        ogg_file_name = './ogg/' + blob.name
        blob.download_to_filename(ogg_file_name)
        print("++++Downloaded")
        wav_file_name = './wav/' + just_name + ".wav"
        blob_wav_name = 'wav/' + just_name + ".wav"
        try:
            call(["ffmpeg", "-i", ogg_file_name, wav_file_name])
            print("++++Converted")
            blob_wav = storage.Blob(blob_wav_name, bucket)
            blob_wav.upload_from_filename(wav_file_name)
            print("++++Uploaded")
            bucket.copy_blob(blob, bucket, "ogg/" + blob.name)
            print("++++Copied")
        except IOError:
            continue
        if is_converted(blob):
            print("++++Successfully converted. Deleting.")
            blob.delete()
            print("++++Deleted")
    print(counter)
Exemplo n.º 15
0
 def _local_to_gs(self, configs):
     self._logger.debug('Starting local to gs...')
     start_timestamp = datetime.now()
     for config in configs:
         self.delete_in_gs(data_name=config.data_name)
     for config in configs:
         if not self.exist_in_local(data_name=config.data_name):
             raise ValueError('There is no data named {} in local'.format(
                 config.data_name))
         for local_file_path in self.list_local_file_paths(
                 data_name=config.data_name):
             basename = os.path.basename(local_file_path)
             if self._gs_dir_path_in_bucket is None:
                 name = basename
             else:
                 name = self._gs_dir_path_in_bucket + '/' + basename
             blob = storage.Blob(name=name,
                                 bucket=self._bucket,
                                 chunk_size=self._chunk_size)
             blob.upload_from_filename(filename=local_file_path)
     for config in configs:
         if config.delete_in_source:
             self.delete_in_local(data_name=config.data_name)
     end_timestamp = datetime.now()
     duration = (end_timestamp - start_timestamp).seconds
     self._logger.debug('Ended local to gs [{}s]'.format(duration))
     return duration
Exemplo n.º 16
0
def read_config_from_bucket():
    global CONFIGURATION
    GCS_BUCKET = os.environ.get(KEY_EV_GCS_BUCKET)
    logger.log_text(
        f'{FUNCTION_NAME}: Environment variable GCS_BUCKET: {GCS_BUCKET}')
    CONFIG_FILE = os.environ.get(KEY_EV_CONFIG_FILE)
    logger.log_text(
        f'{FUNCTION_NAME}: Environment variable CONFIG_FILE: {CONFIG_FILE}')
    if GCS_BUCKET is None or CONFIG_FILE is None:
        logger.log_text(
            f'{FUNCTION_NAME}: Expected environment variables are missing; throwing RuntimeError'
        )
        raise RuntimeError('Expected environment variables are missing')
    try:
        client = storage.Client()
        bucket = client.get_bucket(GCS_BUCKET)
        blob = storage.Blob(CONFIG_FILE, bucket)
        localconfig = BytesIO()
        client.download_blob_to_file(blob, localconfig)
    except:
        logger.log_text(
            f'{FUNCTION_NAME}: Error while transacting with GCS: {sys.exc_info()}'
        )
        raise RuntimeError(
            f'Error while transacting with GCS: {sys.exc_info()}')
    localconfig.seek(0)
    logger.log_text(
        f'{FUNCTION_NAME}: Read {CONFIG_FILE} from bucket: {localconfig.read().decode("utf-8")}'
    )
    localconfig.seek(0)
    CONFIGURATION.read_string(localconfig.read().decode('utf-8'))
    localconfig.close()
Exemplo n.º 17
0
def get_match_replay(intended_user, match_id):
    with model.engine.connect() as conn:
        match = conn.execute(
            sqlalchemy.sql.select([
                model.games.c.replay_name,
                model.games.c.replay_bucket,
            ]).where(model.games.c.id == match_id)).first()

        if not match:
            raise util.APIError(404, message="Match not found.")

        bucket = model.get_replay_bucket(match["replay_bucket"])
        blob = gcloud_storage.Blob(match["replay_name"],
                                   bucket,
                                   chunk_size=262144)
        buffer = io.BytesIO()
        blob.download_to_file(buffer)
        buffer.seek(0)
        response = flask.make_response(
            flask.send_file(buffer,
                            mimetype="application/x-halite-2-replay",
                            as_attachment=True,
                            attachment_filename=str(match_id) + ".hlt"))

        response.headers["Content-Length"] = str(buffer.getbuffer().nbytes)

        return response
Exemplo n.º 18
0
def calc(btc_history, btc_score, btc_prediction_file):
    client = gcs.Client(project_name)

    blob = gcs.Blob(btc_prediction_file, client.get_bucket(bucket_name))
    content = blob.download_as_string()
    btc_prediction = json.loads(content)

    # calc
    today = (datetime.date.today() -
             datetime.timedelta(days=2)).strftime("%Y-%m-%d")
    latest_actual = btc_history['bpi'][today]
    latest_prediction = btc_prediction['prediction'][-1]
    before_prediction = btc_prediction['prediction'][-2]

    if latest_actual > before_prediction:
        a = before_prediction
        b = latest_actual
    else:
        a = latest_actual
        b = before_prediction

    r = a / b

    xs = btc_score
    return [
        xs[0] + (0 if 0.95 <= r and r > 0.98 else 1),
        xs[1] + (0 if 0.98 <= r and r > 0.99 else 1),
        xs[2] + (0 if r <= 0.99 else 1), latest_prediction
    ]
Exemplo n.º 19
0
def download_from_cloud():
    client = storage.Client()
    bucket = client.get_bucket('spk_bucket1')
    blob = storage.Blob('asd/c1.txt', bucket)

    with open('/home/sujithpk/Desktop/d.csv', 'wb') as file_obj:
        blob.download_to_file(file_obj)
Exemplo n.º 20
0
def get_exported_table_df(table_name):
    """Retrieve exported table file on GCS.

    Args:
        table_name (string): Name of the table to load.

    Returns:
        pandas.DataFrame

    """

    bucket = storage\
        .Client(get_config('gcp_project_name'))\
        .get_bucket(get_config('gcs_bucket_name'))
    key = \
        '{experiment_name}/exported_tables/{table_name}/' \
        '{date_descriptor}/out.csv.gzip'.format(
            experiment_name=get_config('experiment_name'),
            table_name=table_name,
            date_descriptor='{{ ds_nodash }}')
    blob = storage.Blob(key, bucket)
    bio = io.BytesIO()
    blob.download_to_file(bio)
    bio.seek(0)

    return pd.read_csv(bio, compression='gzip')
Exemplo n.º 21
0
def push_to_storage(file_path, bucket_name, blob_name=''):
    max_retry = 3
    fails = 0

    chunk_size = 1 << 23
    bck, err = get_gcs_bucket(bucket_name)

    if err:
        return ('', file_path, err)

    if blob_name:
        blob_name = '/'.join([blob_name, basename(file_path)])

    gs.blob._MAX_MULTIPART_SIZE = chunk_size
    blob = gs.Blob(blob_name, bck, chunk_size)

    while blob and fails < max_retry:
        try:
            with open(file_path, 'rb') as f:
                blob.upload_from_file(f)
        except Exception as e:
            err = str(e)
            fails += 1
        else:
            err = ''
            break

    blob_path = 'gs://{}/{}'.format(bck.name, blob.name) \
        if fails < max_retry else ''

    return (blob_path, file_path, err)
Exemplo n.º 22
0
def download_source_blob():
    """Retrieve the worker blob from object storage."""

    cached_blob = cache.get(config.WORKER_ARTIFACT_KEY)
    if cached_blob is None:
        print("Getting from GCloud", config.WORKER_ARTIFACT_KEY)
        # Retrieve from GCloud
        try:
            gcloud_blob = gcloud_storage.Blob(
                config.WORKER_ARTIFACT_KEY,
                model.get_deployed_artifacts_bucket(),
                chunk_size=262144)
            cached_blob = gcloud_blob.download_as_string()
            cache.set(config.WORKER_ARTIFACT_KEY, cached_blob)
        except gcloud_exceptions.NotFound:
            raise util.APIError(404, message="Worker blob not found.")

    if cached_blob is None:
        raise util.APIError(404, message="Worker blob not found.")

    print("Building buffer")
    buffer = io.BytesIO()
    buffer.write(cached_blob)
    buffer.seek(0)
    return flask.send_file(buffer, mimetype="application/gzip",
                           as_attachment=True,
                           attachment_filename="Halite.tgz")
Exemplo n.º 23
0
def new_hires(data):

    df = data[data['DEPARTMENT_TITLE'] == 'CITY ATTORNEY']
    cols = ['RECORD_NBR', 'JOB_CLASS_PGRADE', 'PAY_YEAR', 'GENDER', 'ETHNICITY', 'JOB_TITLE', 'JOB_STATUS', 'EMPLOYMENT_TYPE']

    df2 = df.groupby(cols)['REGULAR_PAY'].sum().reset_index()
    df2['JOB_CLASS_PGRADE_NUMERIC'] = df2['JOB_CLASS_PGRADE'].rank(method='dense', ascending=True).astype(int)
    df2['JOB_CLASS_PGRADE_RANK'] = df2.groupby('RECORD_NBR')['JOB_CLASS_PGRADE_NUMERIC'].rank('dense').astype(int)
    df2['NEW_HIRE'] = df2.groupby('RECORD_NBR')['PAY_YEAR'].rank('dense').astype(int)

    df2['ETHNICITY'] = df2['ETHNICITY'].str.strip().replace(ETHNICITY_MAPPING)

    df2['GROUPING_01'] = df2['JOB_TITLE'].str.strip().replace(JOB_TITLE_MAPPING_01)
    df2['GROUPING_01'] = df2['GROUPING_01'].str.strip()

    df2['GROUPING_02'] = df2['JOB_TITLE'].str.strip().replace(JOB_TITLE_MAPPING_02)
    df2['GROUPING_02'] = df2['GROUPING_02'].str.strip()

    # drop 1st year
    new_hires = df2[(df2['PAY_YEAR'] != 2013) & (df2['NEW_HIRE'] == 1)]
    # drop duplicates
    new_hires = new_hires.drop_duplicates(subset='RECORD_NBR', keep='first')
    new_hires['PAY_YEAR'] = new_hires['PAY_YEAR'].apply(lambda x: f'{x}0101')

    bucket_name = os.getenv('GA_BUCKET')
    fname = 'new_hires.csv'
    ga_bucket_path = f'data/{fname}'

    client = storage.Client()
    bucket = client.get_bucket(bucket_name)
    blob = storage.Blob(ga_bucket_path, bucket)
    df_str = new_hires.to_csv(index=False, encoding='utf-8')
    blob.upload_from_string(df_str)

    print("uploaded new_hires.csv")
Exemplo n.º 24
0
    def download_blob(self, bucket_name, blob_name, local_path=None):
        """
        Gets a blob from a bucket

        `Args:`
            bucket_name: str
                The name of the bucket
            blob_name: str
                The name of the blob
            local_path: str
                The local path where the file will be downloaded. If not specified, a temporary
                file will be created and returned, and that file will be removed automatically
                when the script is done running.
        `Returns:`
            str
                The path of the downloaded file
        """

        if not local_path:
            local_path = files.create_temp_file_for_path('TEMPTHING')

        bucket = storage.Bucket(self.client, name=bucket_name)
        blob = storage.Blob(blob_name, bucket)

        logger.info(f'Downloading {blob_name} from {bucket_name} bucket.')
        with open(local_path, 'wb') as f:
            blob.download_to_file(f, client=self.client)
        logger.info(f'{blob_name} saved to {local_path}.')

        return local_path
Exemplo n.º 25
0
    def run(
        self,
        bucket_name: str = None,
        blob: str = None,
        project: str = None,
        wait_seconds: int = 0,
        fail_if_not_found: bool = True,
        credentials: dict = None,
        request_timeout: Union[float, Tuple[float, float]] = 60,
    ) -> str:
        """
        Run method for this Task. Invoked by _calling_ this Task after initialization
        within a Flow context.

        Note that some arguments are required for the task to run, and must be
        provided _either_ at initialization _or_ as arguments.

        Args:
            - bucket_name (str, optional): the bucket to check
            - blob (str, optional): object for which to search within the bucket
            - project (str, optional): default Google Cloud project to work within.
                If not provided, will be inferred from your Google Cloud credentials
            - wait_seconds(int, optional): retry until file is found or until wait_seconds,
                whichever is first.  Defaults to 0
            - fail_if_not_found (bool, optional):  Will raise Fail signal on task if
                blob is not found.  Defaults to True
            - credentials (dict, optional): a JSON document containing Google Cloud credentials.
                You should provide these at runtime with an upstream Secret task.  If not
                provided, Prefect will first check `context` for `GCP_CREDENTIALS` and lastly
                will use default Google client logic.
            - request_timeout (Union[float, Tuple[float, float]], optional): the number of
                seconds the transport should wait for the server response.
                Can also be passed as a tuple (connect_timeout, read_timeout).

        Returns:
            - bool: the object exists

        Raises:
            - ValueError: if `bucket_name` or `blob` are missing
            - FAIL: if object not found and fail_if_not_found is True

        """
        if None in [bucket_name, blob]:
            raise ValueError("Missing bucket_name or blob")

        # create client
        client = get_storage_client(project=project, credentials=credentials)

        bucket = client.bucket(bucket_name)
        blob_exists = None

        wait, n = 0, 1
        while wait <= wait_seconds and not blob_exists:
            sleep(n)
            wait += n
            n *= 2
            blob_exists = storage.Blob(bucket=bucket, name=blob).exists(client)
        if fail_if_not_found and not blob_exists:
            raise FAIL(message="Blob not found")
        return blob_exists
Exemplo n.º 26
0
 def file_exist_gs(self, gsPath):
     """
     Function to know if a file is contained in a GCS bucket
     Input :
         gsPath : full path file
     Output :
         stat : True if present, False otherwise
     """
     try:
         bucketName, fileName = self.extractBucketFile(gsPath)
         bucket = self.get_bucket(bucketName)
         stat = storage.Blob(bucket=bucket, name=fileName).exists(self)
         self.logger.log_text(
             text=
             f"File {fileName} available in bucket {bucketName} : {stat}",
             severity="INFO",
             labels=self.labels,
         )
         return stat
     except Exception as e:
         self.logger.log_text(
             text=f"Error in checking file {fileName} in GS : {e}",
             severity="ERROR",
             labels=self.labels,
         )
         return False
Exemplo n.º 27
0
    def execute(self):
        series = []
        for fname in self.reg.files():
            with open(fname) as fp:
                data = json.loads(fp.read())
            series.append(data)
        series.sort(key=lambda x: x['refdate'])

        logging.debug('generating series file with %s rows', len(series))

        def dump_n_convert(data):
            s = json.dumps(data) + '\n'
            return s.encode('utf_8')

        temp = tempfile.TemporaryFile('wb+')
        lines = [dump_n_convert(data) for data in series]
        temp.writelines(lines)
        temp.seek(0)

        logging.debug('uploading series file to %s',
                      self.get_output_blob_name())
        series_blob = storage.Blob(self.get_output_blob_name(),
                                   self.input_bucket)
        series_blob.upload_from_file(temp, client=self.client)
        self.reg.cleanup()
Exemplo n.º 28
0
def get_last(channel_id):
    read_storage_client = storage.Client()
    bucket_name = 'airqo-bucket'
    filename = 'channel%s.json'%channel_id
    
    bucket = read_storage_client.get_bucket(bucket_name)
    stats = storage.Blob(bucket=bucket, name=filename).exists(storage_client)
    #size= storage.get_blob(bucket=bucket, name=filename).chunksize
    if not stats:
        last_id = 0
        last_time = None
    else:
        blob = bucket.get_blob(filename)
        json_data_string = blob.download_as_string()
        json_data=ndjson.loads(json_data_string)
        json_list = []
        for item in json_data:
            json_list.append(item)
          
        if len(json_list) != 0:
            last_id = json_list[-1]['entry_id']
            last_time = str_to_date(json_list[-1]['created_at'])
        else:
            last_id= None
            last_time=None
    
    return last_id,last_time
Exemplo n.º 29
0
def ingest_last_week():
    try:
        # verify that this is a cron job request
        is_cron = flask.request.headers['X-Appengine-Cron']
        logging.info('Received cron request {}'.format(is_cron))

        # create png
        url = 'http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv'
        outfile = 'earthquakes.png'
        status = 'scheduled ingest of {} to {}'.format(url, outfile)
        logging.info(status)
        transform.create_png(url, outfile)

        # upload to cloud storage
        client = gcs.Client()
        bucket = client.get_bucket(CLOUD_STORAGE_BUCKET)
        blob = gcs.Blob('earthquakes/earthquakes.png', bucket)
        blob.upload_from_filename(outfile)

        # change permissions
        blob.make_public()
        status = 'uploaded {} to {}'.format(outfile, blob.name)
        logging.info(status)

    except KeyError as e:
        status = '<html>Sorry, this capability is accessible only by the Cron service, but I got a KeyError for {} -- try invoking it from <a href="{}"> the GCP console / AppEngine / taskqueues </a></html>'.format(
            e, 'http://console.cloud.google.com/appengine/taskqueues?tab=CRON')
        logging.info('Rejected non-Cron request')

    return status
Exemplo n.º 30
0
def download_blob(source_blob_name,
                  destination_file_name,
                  bucket_name="project_vaxx"):
    bucket_t = storage_client.bucket(bucket_name, user_project=None)
    temp = storage.Blob(source_blob_name, bucket_t)

    temp.download_to_filename(destination_file_name)