def testListBucket(self): bars = [BUCKET + '/test/bar' + str(i) for i in range(3)] foos = [BUCKET + '/test/foo' + str(i) for i in range(3)] filenames = bars + foos for filename in filenames: self.CreateFile(filename) bucket = cloudstorage.listbucket(BUCKET + '/test/') self.assertEqual(filenames, [stat.filename for stat in bucket]) bucket = cloudstorage.listbucket(BUCKET + '/test/', max_keys=1) stats = list(bucket) self.assertEqual(1, len(stats)) stat = stats[0] content = ''.join(DEFAULT_CONTENT) self.assertEqual(filenames[0], stat.filename) self.assertEqual(len(content), stat.st_size) self.assertEqual(hashlib.md5(content).hexdigest(), stat.etag) bucket = cloudstorage.listbucket(BUCKET + '/test/', marker=BUCKET + '/test/foo0', max_keys=1) stats = [stat for stat in bucket] self.assertEqual(1, len(stats)) stat = stats[0] self.assertEqual(foos[1], stat.filename)
def delete_files_in_folder(self, folder, skip=[]): bucketFolder = self.getPath(folder) log.info("bucketFolder %s" % bucketFolder) startdel = datetime.datetime.now() delcount = 0 page_size = 100 stats = cloudstorage.listbucket(bucketFolder, max_keys=page_size) files = [] while True: count = 0 for stat in stats: count += 1 fname = stat.filename delete = True if skip: for s in skip: if s in fname: delete = False log.info("SKIPPING: %s" % fname) break if delete: files.append(stat.filename) for f in files: delcount += 1 self._delete_file(f) if count != page_size or count == 0: break stats = cloudstorage.listbucket(bucketFolder, max_keys=page_size,marker=stat.filename) files = [] log.info("Cloudstorage: deleted %s files in %s seconds" % (delcount, (datetime.datetime.now() - startdel))) return delcount
def list_bucket(self, bucket): """Create several files and paginate through them. Production apps should set page_size to a practical value. Args: bucket: bucket. """ self.response.write('Creating more files for listbucket...\n') self.create_file(bucket + '/foo1') self.create_file(bucket + '/foo2') self.response.write('\nListbucket result:\n') page_size = 1 stats = gcs.listbucket(bucket, max_keys=page_size) while True: count = 0 for stat in stats: count += 1 self.response.write(repr(stat)) self.response.write('\n') if count != page_size or count == 0: break last_filename = stat.filename[len(bucket)+1:] stats = gcs.listbucket(bucket, max_keys=page_size, marker=last_filename)
def testListBucketWithDelimiter(self): filenames = ['/bar', '/foo0', '/foo1', '/foo/a', '/foo/b/bb', '/foo/b/bbb', '/foo/c/c', '/foo1/a', '/foo2/a', '/foo2/b', '/foo3/a'] def FullyQualify(n): return BUCKET + n fullnames = [FullyQualify(n) for n in filenames] for n in fullnames: self.CreateFile(n) bucket = cloudstorage.listbucket(BUCKET + '/foo', delimiter='/', max_keys=5) expected = [FullyQualify(n) for n in ['/foo/', '/foo0', '/foo1', '/foo1/', '/foo2/']] self.assertEqual(expected, [stat.filename for stat in bucket]) bucket = cloudstorage.listbucket(BUCKET + '/foo/', delimiter='/', max_keys=2) expected = [FullyQualify(n) for n in ['/foo/a', '/foo/b/']] self.assertEqual(expected, [stat.filename for stat in bucket])
def testListBucketWithDelimiter(self): filenames = [ '/bar', '/foo0', '/foo1', '/foo/a', '/foo/b/bb', '/foo/b/bbb', '/foo/c/c', '/foo1/a', '/foo2/a', '/foo2/b', '/foo3/a' ] def FullyQualify(n): return BUCKET + n fullnames = [FullyQualify(n) for n in filenames] for n in fullnames: self.CreateFile(n) bucket = cloudstorage.listbucket(BUCKET + '/foo', delimiter='/', max_keys=5) expected = [ FullyQualify(n) for n in ['/foo/', '/foo0', '/foo1', '/foo1/', '/foo2/'] ] self.assertEqual(expected, [stat.filename for stat in bucket]) bucket = cloudstorage.listbucket(BUCKET + '/foo/', delimiter='/', max_keys=2) expected = [FullyQualify(n) for n in ['/foo/a', '/foo/b/']] self.assertEqual(expected, [stat.filename for stat in bucket])
def list_bucket(self, bucket): """Create several files and paginate through them. Production apps should set page_size to a practical value. Args: bucket: bucket. """ self.response.write('Listbucket result:\n') page_size = 1 stats = gcs.listbucket(bucket + '/foo', max_keys=page_size) while True: count = 0 for stat in stats: count += 1 self.response.write(repr(stat)) self.response.write('\n') if count != page_size or count == 0: break # pylint: disable=undefined-loop-variable stats = gcs.listbucket(bucket + '/foo', max_keys=page_size, marker=stat.filename)
def list_bucket(self, bucket): """Create several files and paginate through them. Production apps should set page_size to a practical value. Args: bucket: bucket. """ self.response.write('Creating more files for listbucket...\n') self.create_file(bucket + '/foo1') self.create_file(bucket + '/foo2') self.response.write('\nListbucket result:\n') page_size = 1 stats = gcs.listbucket(bucket, max_keys=page_size) while True: count = 0 for stat in stats: count += 1 self.response.write(repr(stat)) self.response.write('\n') if count != page_size or count == 0: break last_filename = stat.filename[len(bucket) + 1:] stats = gcs.listbucket(bucket, max_keys=page_size, marker=last_filename)
def list_bucket_directory_mode(self, bucket): print 'Listbucket directory mode result' for stat in gcs.listbucket(bucket + '/b', delimiter='/'): print stat if stat.is_dir: for subdir_file in gcs.listbucket(stat.filename, delimiter='/'): print subdir_file
def list_bucket_directory_mode(self, bucket): self.response.write('Listbucket directory mode result:\n') for stat in gcs.listbucket(bucket + '/b', delimiter='/'): self.response.write('%r' % stat) self.response.write('\n') if stat.is_dir: for subdir_file in gcs.listbucket(stat.filename, delimiter='/'): self.response.write(' %r' % subdir_file) self.response.write('\n')
def list_bucket_directory_mode(self, bucket): self.response.write('Listbucket directory mode result:\n') for stat in cloudstorage.listbucket(bucket + '/b', delimiter='/'): self.response.write(stat) self.response.write('\n') if stat.is_dir: for subdir_file in cloudstorage.listbucket( stat.filename, delimiter='/'): self.response.write(' {}'.format(subdir_file)) self.response.write('\n')
def list_bucket_directory_mode(self, bucket): self.response.write('Listbucket directory mode result:\n') for stat in cloudstorage.listbucket(bucket + '/b', delimiter='/'): self.response.write(stat) self.response.write('\n') if stat.is_dir: for subdir_file in cloudstorage.listbucket(stat.filename, delimiter='/'): self.response.write(' {}'.format(subdir_file)) self.response.write('\n')
def testRemoveGarbage(self): """Make sure abandoned files get removed.""" writer_spec = { self.WRITER_CLS.BUCKET_NAME_PARAM: "unused", self.WRITER_CLS.TMP_BUCKET_NAME_PARAM: "test" } mapreduce_state = self.create_mapreduce_state( output_params=writer_spec) shard_state = self.create_shard_state(1) ctx = context.Context(mapreduce_state.mapreduce_spec, shard_state) context.Context._set(ctx) writer = self.WRITER_CLS.create(mapreduce_state.mapreduce_spec, shard_state.shard_number, 0) writer.begin_slice(None) # our shard our_file = "/test/gae_mr_tmp/DummyMapReduceJobId-tmp-1-very-random" f = cloudstorage.open(our_file, "w") f.write("foo?") f.close() # not our shard their_file = "/test/gae_mr_tmp/DummyMapReduceJobId-tmp-3-very-random" f = cloudstorage.open(their_file, "w") f.write("bar?") f.close() # unrelated file real_file = "/test/this_things_should_survive" f = cloudstorage.open(real_file, "w") f.write("yes, foobar!") f.close() # Make sure bogus file still exists names = [l.filename for l in cloudstorage.listbucket("/test")] self.assertTrue(our_file in names) self.assertTrue(their_file in names) self.assertTrue(real_file in names) # slice end should clean up the garbage writer = self._serialize_and_deserialize(writer) names = [l.filename for l in cloudstorage.listbucket("/test")] self.assertFalse(our_file in names) self.assertTrue(their_file in names) self.assertTrue(real_file in names) # finalize shouldn't change anything writer.finalize(ctx, shard_state) self.assertFalse(our_file in names) self.assertTrue(their_file in names) self.assertTrue(real_file in names)
def list_bucket(self, bucket): page_size = 1 stats = gcs.listbucket(bucket, max_keys=page_size) while True: count = 0 for stat in stats: count += 1 self.response.write(repr(stat)) self.response.write('\n') if count != page_size or count == 0: break stats = gcs.listbucket(bucket, max_keys=page_size, marker=stat.filename)
def testRemoveGarbage(self): """Make sure abandoned files get removed.""" writer_spec = {self.WRITER_CLS.BUCKET_NAME_PARAM: "unused", self.WRITER_CLS.TMP_BUCKET_NAME_PARAM: "test"} mapreduce_state = self.create_mapreduce_state(output_params=writer_spec) shard_state = self.create_shard_state(1) ctx = context.Context(mapreduce_state.mapreduce_spec, shard_state) context.Context._set(ctx) writer = self.WRITER_CLS.create(mapreduce_state.mapreduce_spec, shard_state.shard_number, 0) writer.begin_slice(None) # our shard our_file = "/test/gae_mr_tmp/DummyMapReduceJobId-tmp-1-very-random" f = cloudstorage.open(our_file, "w") f.write("foo?") f.close() # not our shard their_file = "/test/gae_mr_tmp/DummyMapReduceJobId-tmp-3-very-random" f = cloudstorage.open(their_file, "w") f.write("bar?") f.close() # unrelated file real_file = "/test/this_things_should_survive" f = cloudstorage.open(real_file, "w") f.write("yes, foobar!") f.close() # Make sure bogus file still exists names = [l.filename for l in cloudstorage.listbucket("/test")] self.assertTrue(our_file in names) self.assertTrue(their_file in names) self.assertTrue(real_file in names) # slice end should clean up the garbage writer = self._serialize_and_deserialize(writer) names = [l.filename for l in cloudstorage.listbucket("/test")] self.assertFalse(our_file in names) self.assertTrue(their_file in names) self.assertTrue(real_file in names) # finalize shouldn't change anything writer.finalize(ctx, shard_state) self.assertFalse(our_file in names) self.assertTrue(their_file in names) self.assertTrue(real_file in names)
def get_filenames(): page_size = 1 stats = gcs.listbucket(bucket, max_keys=page_size) names = [] while True: count = 0 for stat in stats: count += 1 names.append(repr(stat.filename)[1:-1]) if count != page_size or count == 0: break stats = gcs.listbucket(bucket, max_keys=page_size, marker=stat.filename) return names
def get(self): """Regelmäßig von Cron aufzurufen.""" if not gaetkconfig.BIGQUERY_PROJECT: return self.return_text('BIGQUERY_PROJECT not provided, exiting') if not gaetkconfig.BACKUP_BUCKET: bucket = get_default_gcs_bucket_name() else: bucket = gaetkconfig.BACKUP_BUCKET bucketpath = '/'.join((bucket, get_application_id())) bucketpath = '/{}/'.format(bucketpath.strip('/')) logger.info('searching backups in %r', bucketpath) objs = cloudstorage.listbucket(bucketpath, delimiter=b'/') subdirs = sorted((obj.filename for obj in objs if obj.is_dir), reverse=True) # Find Path of newest available backup # typical path: # '/appengine-backups-eu-nearline/hudoraexpress/2017-05-02/ag9...EM.ArtikelBild.backup_info' dirs = {} for subdir in subdirs: try: datum = convert_to_date(subdir.rstrip('/').split('/')[-1]) except ValueError: continue else: dirs[datum] = subdir if not dirs: raise RuntimeError('No Datastore Backup found in %r' % (bucketpath)) datum = max(dirs) if datum < datetime.date.today() - datetime.timedelta(days=7): raise exc.RuntimeError( 'Latest Datastore Backup in {!r} is way too old!'.format( bucketpath)) countdown = 1 subdir = dirs[datum] logger.info('Uploading Backup %s from directory %s', datum, subdir) regexp = re.compile(subdir + r'([\w-]+)\.(\w+)\.backup_info') for obj in cloudstorage.listbucket(subdir): if regexp.match(obj.filename): defer(upload_backup_file, obj.filename, _countdown=countdown) countdown += 2 self.response.write('ok, countdown=%d\n' % (countdown))
def get_latest_data_location(device): # type: (Device) -> str """ Gets the url path to the latest data entry from a given device :param device: device datastore object :return: string """ serial = device.serial_num base_path = BUCKET_PREFIX.format(serial) filenames = [] # This is probably wrong too. Damnit. for statinfo in gcs.listbucket(base_path, retry_params=READ_RETRY): filenames.append(statinfo.filename) split_filenames = [os.path.splitext(f) for f in filenames] with_index = [(int(os.path.split(f[0])[1]), f) for f in split_filenames] sorted_filenames = sorted(with_index, key=lambda e: e[0], reverse=True) try: index, split_filename = sorted_filenames[0] except IndexError: raise RuntimeError("There are no files in the storage.") filename = split_filename[0] + split_filename[1] return filename
def get(self): try: ch=0 cacheFlag=self.request.get("cacheFlag") print("cache flag recieved",cacheFlag); if str(cacheFlag)=="false": stats = gcs.listbucket(bucket_name) print("accessing gcs bucket"); count = 0 for stat in stats: self.response.write(stat.filename+";") count+=1 print("total no of files in GCS: "+count) if count == 0: self.response.write("No data found :404") else: print("in else part"); filecontent = memcache.get('Master') print(filecontent) if len(memcache.get('Master')) > 0: print("fetching memcache list") for files in filecontent: filedata = memcache.get(files) if filedata != None: self.response.write(files+";") else: count = -1 count = None stats = None except Exception as e: print type(e) print "server exception"
def FileExists(self, bucketname, filename): stats = cloudstorage.listbucket(bucketname) exist = False for stat in stats: if bucketname+'/'+filename in stat.filename: exist = True return (exist)
def getFiles(): stats = gcs.listbucket(FileManager.getBucketName() + '/') files = [] for stat in stats: items = stat.filename.split('/') files.append(items[-1]) return files
def get(self): bucket_name = 'virtualproctor' self.response.headers['Content-Type'] = 'application/json' bucket = '/' + bucket_name try: stats = gcs.listbucket(bucket) ten_minutes = datetime.timedelta(minutes=10) ten_minutes_ago = datetime.datetime.now() - ten_minutes names = [] for x in stats: create_time = datetime.datetime.fromtimestamp(x.st_ctime) if ten_minutes_ago < create_time: name = x.filename.replace('/virtualproctor/', '').split('.') image = { 'classroom': name[0], 'student': name[1], 'created': x.st_ctime } names.append(image) self.response.write(json.dumps(names)) except Exception as e: logging.exception(e) self.response.write('{"error": "oops"}')
def get_files(self): """Get files from GCS""" files = [] stats = gcs.listbucket('/%s/backups' % BACKUP_BUCKET, max_keys=100) for stat in stats: files.append(stat) return files
def sync_table_only_ids(self): del self.table_only_ids[:] # table = [] would break the references! stats = gcs.listbucket('%s/%s' % (bucket, self._path)) for stat in stats: id = stat.filename self.table_only_ids.append({'id': id}) return self.table_only_ids
def hello(): from google.appengine.api import app_identity import cloudstorage as gcs #GEt default bucket name default_bucket_name = app_identity.get_default_gcs_bucket_name() out = "Default bucket : " + default_bucket_name ## Write to cloud filename = '/' + default_bucket_name + '/new.txt' gcs_file = gcs.open(filename, 'w', content_type='text/plain') gcs_file.write('abcde\n') gcs_file.write('Hello!' + '\n') gcs_file.close() ## Read from cloud gcs_file = gcs.open(filename) contents = gcs_file.read() gcs_file.close() out += "<p>Contents :</p>" out += "<p>" + contents + "</p>" #files CANNOT be appended as the objects are immutable. to append you have to read, modify contents and overwrite #List items blist = gcs.listbucket('/' + default_bucket_name) print(list(blist)) return out
def _next_file(self): """Find next filename. self._filenames may need to be expanded via listbucket. Returns: None if no more file is left. Filename otherwise. """ while True: if self._bucket_iter: try: return self._bucket_iter.next().filename except StopIteration: self._bucket_iter = None self._bucket = None if self._index >= len(self._filenames): return filename = self._filenames[self._index] self._index += 1 if self._delimiter is None or not filename.endswith( self._delimiter): return filename self._bucket = cloudstorage.listbucket(filename, delimiter=self._delimiter) self._bucket_iter = iter(self._bucket)
def get(self): bucket_name = os.environ.get( 'BUCKET_NAME', app_identity.get_default_gcs_bucket_name()) self.response.headers['Content-Type'] = 'text/plain' self.response.write('Demo GCS Application running from Version: ' + os.environ['CURRENT_VERSION_ID'] + '\n') self.response.write('Using bucket name: ' + bucket_name + '\n\n') self.response.write('Listbucket results:\n') bucket = '/' + bucket_name page_size = 100 stats = gcs.listbucket(bucket + '/', max_keys=page_size) self.response.write('Stats for #%s entries\n' % stats) while True: count = 0 for stat in stats: count += 1 self.response.write('Item #%d' % count) self.response.write('\n') self.response.write(repr(stat)) self.response.write('\n') self.response.write('Count = %d\n' % count) if count != page_size or count == 0: self.response.write('Breaking = %d\n' % count) break
def list_bucket(collective): """Create several files and paginate through them. Production apps should set page_size to a practical value. Args: bucket: bucket. """ bucket = '/ahtme-music/' + collective page_size = 10 stats = gcs.listbucket(bucket, max_keys=page_size) files = [] while True: count = 0 for stat in stats: count += 1 stat.filename = stat.filename.split('/')[-1] files.append(stat) # print stat # self.response.write(repr(stat)) # self.response.write('\n') if count != page_size or count == 0: break # stats = gcs.listbucket(bucket, max_keys=page_size, # marker=stat.filename) return files
def listdir(self, dir_name): """Lists all files in a directory. Args: dir_name: str. The directory whose files should be listed. This should not start with '/' or end with '/'. Returns: list(str). A lexicographically-sorted list of filenames. """ if dir_name.endswith('/') or dir_name.startswith('/'): raise IOError( 'The dir_name should not start with / or end with / : %s' % (dir_name)) # The trailing slash is necessary to prevent non-identical directory # names with the same prefix from matching, e.g. /abcd/123.png should # not match a query for files under /abc/. prefix = '%s' % utils.vfs_construct_path('/', self._assets_path, dir_name) if not prefix.endswith('/'): prefix += '/' # The prefix now ends and starts with '/'. bucket_name = app_identity_services.get_gcs_resource_bucket_name() # The path entered should be of the form, /bucket_name/prefix. path = '/%s%s' % (bucket_name, prefix) stats = cloudstorage.listbucket(path) files_in_dir = [] for stat in stats: files_in_dir.append(stat.filename) return files_in_dir
def get(self): try: ch=0 cacheFlag=self.request.get("cacheFlag") print("cache flag recieved",cacheFlag); # cacheFlag = False; Fetch list from GCS if str(cacheFlag)=="false": stats = gcs.listbucket(bucket_name) print("accessing gcs bucket"); count = 0 for stat in stats: self.response.write(stat.filename+";") count+=1 print("total no of files in GCS: "+count) if count == 0: self.response.write("No data found :404") # cacheFlag = False; Fetch list from Memcache else: print("in else part"); filecontent = memcache.get('Master') print(filecontent) if len(memcache.get('Master')) > 0: print("fetching memcache list") for files in filecontent: filedata = memcache.get(files) if filedata != None: self.response.write(files+";") else: count = -1 count = None stats = None except Exception as e: print type(e) print "server exception"
def GetAllPaths(self, prefix, max_keys=None, marker=None, delimiter=None): return (f.filename[len(self.bucket) + 1:] for f in cloudstorage.listbucket(self.bucket, prefix=prefix, max_keys=max_keys, marker=marker, delimiter=delimiter))
def get(self): u = self.user_info username = u['name'] # storage params bucketlist = gcs.listbucket(Rbucket) params = {'username': username} rbac(self, 'gcs', params)
def get(self): #login usr = users.get_current_user() if not usr: url = users.create_login_url(self.request.uri) url_linktext = 'Login' self.redirect(users.create_login_url(self.request.uri)) else: url = users.create_logout_url(self.request.uri) url_linktext = 'Logout' #testing users deb model userlist = User.query().fetch(5) #get files from bucket for the user bucket_name = "/"+os.environ.get('BUCKET_NAME',app_identity.get_default_gcs_bucket_name())+"/"+str(usr) l_files=gcs.listbucket(bucket_name) #get shared files of the user sh_files = SharedFile.query(SharedFile.recipients == usr.email()) result = sh_files.fetch(1000) template_values = { 'url': url, 'url_linktext': url_linktext, 'user_name': usr, 'files': l_files, 'users': userlist, 'shared_files': sh_files, } template = JINJA_ENVIRONMENT.get_template('index.html') self.response.write(template.render(template_values))
def search_file_in_bucket(bucket_name, folder_name, file_name): """Search through all files with in folder_name.""" buck = cloudstorage.listbucket("/"+bucket_name+"/"+folder_name, marker='/my_bucket/'+folder_name+'/') for blob in buck: if blob.filename == file_name: return True return False
def exists_attachments_for_entity_key(entity_key): bucket_name = os.environ.get( 'BUCKET_NAME', app_identity.get_default_gcs_bucket_name()) files = gcs.listbucket("/" + bucket_name + "/" + entity_key) for file in files: return True return False
def listdir(self, name): """ TODO collect directories """ return [], [ obj.filename for obj in cloudstorage.listbucket(self.path(name)) ]
def list_gcs_file_names(cls, bucket=None, folder='/'): """ Example usage : for gcs_filename, filename in BlobFiles.list_gcs_file_names(folder='/upload') """ for obj in gcs.listbucket('/%s%s' % (bucket or app_identity.get_default_gcs_bucket_name(), folder)): pbf = cls._query(cls.gcs_filename == obj.filename).get(projection=cls.filename) # yield result: the gcs_filename from GCS and the corresponding filename from BlobFiles yield obj.filename, (pbf.filename if pbf else '')
def get(self, backup_date): # Make sure the requested backup exists backup_bucket = self.get_backup_bucket() backup_dir = "/{}/{}/".format(backup_bucket, backup_date) backup_files = cloudstorage.listbucket(backup_dir) bucket_prefix = "/{}/".format(backup_bucket) count = 0 for bfile in backup_files: if bfile.is_dir: continue count += 1 fname = bfile.filename path = fname[len(bucket_prefix):] taskqueue.add( url='/backend-tasks/backup/archive/file', params={ 'bucket': backup_bucket, 'object': path, }, queue_name='backups', method='POST') self.response.out.write("Enqueued updates for {} files".format(count))
def post(self): # get args self.start_cursor = self.request.get('cursor') self.filtering_event_key = self.request.get('event') self.filename = self.request.get('filename') self.csv_header = self.request.get('csv_header') self.worker_url = self.request.get('worker_url') self.event = Event.get(self.filtering_event_key) if self.filtering_event_key else None # get (base) query, skip query to cursor, filter for sites query = self.get_base_query() if self.start_cursor: query.with_cursor(self.start_cursor) fetched_sites = query.fetch(limit=self.sites_per_task) sites = self.filter_sites(fetched_sites) # write part of csv file to GCS csv_part_gcs_fd = cloudstorage.open( BUCKET_NAME + '/' + self.filename + '.part.' + self.start_cursor, 'w', content_type='text/csv' ) self._write_csv_rows(csv_part_gcs_fd, sites) csv_part_gcs_fd.close() # decide what to do next self.end_cursor = query.cursor() if self.end_cursor and self.start_cursor != self.end_cursor: # chain to next task taskqueue.add( url=self.worker_url, params=self.get_continuation_param_dict(), retry_options=taskqueue.TaskRetryOptions(task_retry_limit=3), ) else: # finish file: combine parts and deduplicate lines logging.info(u"Deduplicating to create %s ..." % self.filename) sio = StringIO() path_prefix = BUCKET_NAME + '/' + self.filename + '.part' for gcs_file_stat in cloudstorage.listbucket(path_prefix): csv_part_gcs_fd = cloudstorage.open(gcs_file_stat.filename) for line in csv_part_gcs_fd: sio.write(line) csv_part_gcs_fd.close() sio.seek(0) deduplicated_lines = set(line for line in sio) # write csv header and deduplicated lines to new file csv_complete_gcs_fd = cloudstorage.open( BUCKET_NAME + '/' + self.filename, 'w', content_type='text/csv' ) csv_complete_gcs_fd.write(self.csv_header.encode('utf-8')) for line in deduplicated_lines: csv_complete_gcs_fd.write(line) csv_complete_gcs_fd.close()
def GetCachedResults2(job): filename = _GetCloudStorageName(job.job_id) results = cloudstorage.listbucket(filename) for _ in results: return 'https://storage.cloud.google.com' + filename return None
def list_bucket(self,bucket): self.response.write('Listbucket result:\n') page_size=1 stats=gcs.listbucket(bucket+'/foo',max_keys=page_size) while True: count = 0 for stat in stats: count+=1 self.response.write(repr(stat)) self.response.write('\n') if count != page_size or count==0: break stats=gcs.listbucket(bucket+'/foo', max_keys=page_size, market=stat.filename)
def testListBucketPickle(self): bars = [BUCKET + '/test/bar' + str(i) for i in range(3)] foos = [BUCKET + '/test/foo' + str(i) for i in range(3)] filenames = bars + foos for filename in filenames: self.CreateFile(filename) bucket = cloudstorage.listbucket(BUCKET + '/test/') self.AssertListBucketEqual(filenames, bucket) bucket = cloudstorage.listbucket(BUCKET + '/test/', max_keys=2) self.AssertListBucketEqual(bars[:2], bucket) bucket = cloudstorage.listbucket(BUCKET + '/test/', marker=BUCKET + '/test/bar2', max_keys=2) self.AssertListBucketEqual(foos[:2], bucket)
def sizes(self): """ Returns a list of all available sizes of this photo. """ inodes = cloudstorage.listbucket(path_prefix="/{}/{}".format( self.GCS_BUCKET, self.path)) return [inode.filename.split("-")[-1] for inode in inodes]
def delete_file(): # GCS helpfully decodes UTF-8 for you, is a bit weird because it won't # accept unicode when creating an object. for stat in cloudstorage.listbucket(folder): target = stat.filename.encode('utf-8') cloudstorage.delete(target) return bottle.redirect('/list')
def testMapReduce(self): # Prepare test data bucket_name = "testbucket" job_name = "test_job" entity_count = 200 for i in range(entity_count): TestEntity(data=str(i)).put() TestEntity(data=str(i)).put() # Run Mapreduce p = mapreduce_pipeline.MapreducePipeline( job_name, __name__ + ".test_mapreduce_map", __name__ + ".test_mapreduce_reduce", input_reader_spec=input_readers.__name__ + ".DatastoreInputReader", output_writer_spec=( output_writers.__name__ + "._GoogleCloudStorageRecordOutputWriter"), mapper_params={ "entity_kind": __name__ + "." + TestEntity.__name__, "bucket_name": bucket_name }, reducer_params={ "output_writer": { "bucket_name": bucket_name }, }, shards=16) p.start() test_support.execute_until_empty(self.taskqueue) self.assertEquals(1, len(self.emails)) self.assertTrue(self.emails[0][1].startswith( "Pipeline successful:")) # Verify reduce output. p = mapreduce_pipeline.MapreducePipeline.from_id(p.pipeline_id) self.assertEqual(model.MapreduceState.RESULT_SUCCESS, p.outputs.result_status.value) output_data = [] for output_file in p.outputs.default.value: with cloudstorage.open(output_file) as f: for record in records.RecordsReader(f): output_data.append(record) expected_data = [ str((str(d), ["", ""])) for d in range(entity_count)] expected_data.sort() output_data.sort() self.assertEquals(expected_data, output_data) # Verify that mapreduce doesn't leave intermediate files behind. temp_file_stats = cloudstorage.listbucket("/" + bucket_name) for stat in temp_file_stats: if stat.filename: self.assertFalse( stat.filename.startswith("/%s/%s-shuffle-" % (bucket_name, job_name)))
def get(self): u = self.user_info username = u['name'] # storage params bucketlist = gcs.listbucket(Rbucket) params = { 'username': username } rbac(self, 'gcs', params)
def listdir(filename, recursive=True): bucket, prefix = filename[1:].split('/', 1) bucket = '/' + bucket names = set() for item in cloudstorage.listbucket(bucket, prefix=prefix): name = item.filename[len(bucket) + len(prefix) + 1:] if name and (recursive or '/' not in name): names.add(name) return list(names)
def rmtree(self, path): if path != "": path_prefix = self.location + "/" + path + "/" else: path_prefix = self.location + "/" bucketContents = gcs.listbucket(path_prefix=path_prefix) for entry in bucketContents: gcs.delete(entry.filename)
def __deleteBlob(cls,key): """ Private method to delete a blobstore file from key param @key is String """ r = gcs.listbucket(cls.__bucket_name) for a in r: if a.filename[len(cls.__bucket_name):]==key: gcs.delete(a.filename) break
def _runTest(self, num_shards): entity_count = 1000 bucket_name = "bucket" tmp_bucket_name = "tmp_bucket" job_name = "test_map" for _ in range(entity_count): TestEntity().put() mapreduce_id = control.start_map( job_name, __name__ + ".test_handler_yield_key_str", DATASTORE_READER_NAME, { "entity_kind": __name__ + "." + TestEntity.__name__, "output_writer": { "bucket_name": bucket_name, "tmp_bucket_name": tmp_bucket_name, }, }, shard_count=num_shards, output_writer_spec=self.WRITER_NAME) test_support.execute_until_empty(self.taskqueue) mapreduce_state = model.MapreduceState.get_by_job_id(mapreduce_id) filenames = self.WRITER_CLS.get_filenames(mapreduce_state) self.assertEqual(num_shards, len(set(filenames))) total_entries = 0 for shard in range(num_shards): self.assertTrue(filenames[shard].startswith("/%s/%s" % (bucket_name, job_name))) data = cloudstorage.open(filenames[shard]).read() # strip() is used to remove the last newline of each file so that split() # does not retrun extraneous empty entries. total_entries += len(data.strip().split("\n")) self.assertEqual(entity_count, total_entries) # no files left in tmpbucket self.assertFalse(list(cloudstorage.listbucket("/%s" % tmp_bucket_name))) # and only expected files in regular bucket files_in_bucket = [ f.filename for f in cloudstorage.listbucket("/%s" % bucket_name)] self.assertEquals(filenames, files_in_bucket)
def testListBucketCompatibility(self): """Test listbucket's old interface still works.""" bars = [BUCKET + '/test/bar' + str(i) for i in range(3)] foos = [BUCKET + '/test/foo' + str(i) for i in range(3)] filenames = bars + foos for filename in filenames: self.CreateFile(filename) bucket = cloudstorage.listbucket(BUCKET, prefix='test/', marker='test/foo') self.assertEqual(foos, [stat.filename for stat in bucket])
def _get_files(): files = [] for f in cloudstorage.listbucket(OLD_BUCKET): split = f.filename.split('/protocol-logs-')[1].split('.') date_str = split[0] date = datetime.strptime(date_str, '%Y%m%d') if split[-1] == 'processed' or date < datetime(2016, 3, 17): continue files.append(f.filename) return files
def list_bucket(self, bucket): """Create several files and paginate through them.""" self.response.write('Listbucket result:\n') # Production apps should set page_size to a practical value. page_size = 1 stats = cloudstorage.listbucket(bucket + '/foo', max_keys=page_size) while True: count = 0 for stat in stats: count += 1 self.response.write(repr(stat)) self.response.write('\n') if count != page_size or count == 0: break stats = cloudstorage.listbucket( bucket + '/foo', max_keys=page_size, marker=stat.filename)
def list_files(): list_retry_params = gcs.RetryParams(initial_delay=.25, max_retries=0, urlfetch_timeout=.25) files = set() try: for file in gcs.listbucket("/{}/".format(BUCKET_NAME), retry_params=list_retry_params): files.add(file.filename) except gcs.TimeoutError: pass finally: return files
def listing(): listbucket=[] bucketContent = gcs.listbucket(bucket, marker=None, max_keys=None, delimiter=None, retry_params=None) for entry in bucketContent: listbucket.append(entry.filename) return listbucket