def run(self, job_name, input_files): empty = True for filename in input_files: if files.stat(filename).st_size > 0: empty = False break if empty: self.complete([]) return shard_number = len(input_files) output_files = [] for i in range(shard_number): blob_file_name = (job_name + "-shuffle-output-" + str(i)) file_name = files.blobstore.create( _blobinfo_uploaded_filename=blob_file_name) output_files.append(file_name) self.fill(self.outputs._output_files, output_files) files.shuffler.shuffle("%s-%s" % (job_name, int(time.time())), input_files, output_files, { "url": self.get_callback_url(), "method": "GET", "queue": self.queue_name, "version": os.environ["CURRENT_VERSION_ID"], })
def run(self, job_name, input_files): empty = True for filename in input_files: if files.stat(filename).st_size > 0: empty = False break if empty: self.complete([]) return shard_number = len(input_files) output_files = [] for i in range(shard_number): blob_file_name = (job_name + "-shuffle-output-" + str(i)) file_name = files.blobstore.create( _blobinfo_uploaded_filename=blob_file_name) output_files.append(file_name) self.fill(self.outputs._output_files, output_files) target = modules.get_current_version_name() module_name = modules.get_current_module_name() if module_name != "default": target = "%s.%s." % (target, module_name) files.shuffler.shuffle( "%s-%s" % (job_name, int(time.time())), input_files, output_files, { "url": self.get_callback_url(), "method": "GET", "queue": self.queue_name, "version": target, })
def run(self, job_name, input_files): empty = True for filename in input_files: if files.stat(filename).st_size > 0: empty = False break if empty: self.complete([]) return shard_number = len(input_files) output_files = [] for i in range(shard_number): blob_file_name = (job_name + "-shuffle-output-" + str(i)) file_name = files.blobstore.create( _blobinfo_uploaded_filename=blob_file_name) output_files.append(file_name) self.fill(self.outputs._output_files, output_files) files.shuffler.shuffle( "%s-%s" % (job_name, int(time.time())), input_files, output_files, { "url": self.get_callback_url(), "method": "GET", "queue": self.queue_name, "version": os.environ["CURRENT_VERSION_ID"], })
def testShuffleNoFile(self): p = shuffler.ShufflePipeline("testjob", []) p.start() test_support.execute_until_empty(self.taskqueue) p = shuffler.ShufflePipeline.from_id(p.pipeline_id) for filename in p.outputs.default.value: self.assertEqual(0, files.stat(filename).st_size)
def testShuffleNoFile(self): p = shuffler.ShufflePipeline( "testjob", []) p.start() test_support.execute_until_empty(self.taskqueue) p = shuffler.ShufflePipeline.from_id(p.pipeline_id) for filename in p.outputs.default.value: self.assertEqual(0, files.stat(filename).st_size)
def testShuffleNoData(self): input_file = files.blobstore.create() files.finalize(input_file) input_file = files.blobstore.get_file_name( files.blobstore.get_blob_key(input_file)) p = shuffler.ShufflePipeline( "testjob", [input_file, input_file, input_file]) p.start() test_support.execute_until_empty(self.taskqueue) p = shuffler.ShufflePipeline.from_id(p.pipeline_id) for filename in p.outputs.default.value: self.assertEqual(0, files.stat(filename).st_size)
def file_insert(key, value): FILEPATH = BUCKET + '/' + key write_path = files.gs.create(FILEPATH, mime_type='text/plain', acl='public-read') # Write to the file. with files.open(write_path, 'a') as fp: fp.write(value) # Finalize the file so it is readable in Google Cloud Storage. files.finalize(write_path) filemeta = files.stat(FILEPATH) if (filemeta.st_size) < 100* 1024 and MEMCACHED_ENABLED: # cache small files memcache.set(key, value)
def testShuffleNoData(self): input_file = files.blobstore.create() files.finalize(input_file) input_file = files.blobstore.get_file_name( files.blobstore.get_blob_key(input_file)) p = shuffler.ShufflePipeline("testjob", [input_file, input_file, input_file]) p.start() test_support.execute_until_empty(self.taskqueue) p = shuffler.ShufflePipeline.from_id(p.pipeline_id) for filename in p.outputs.default.value: self.assertEqual(0, files.stat(filename).st_size)
def get(self): #self.response.headers['Content-Type'] = 'text/html' allobj = listing() size = 0 count = 0 for f in allobj: s = files.stat(BUCKET + '/' + f) size += s.st_size count += 1 template_values = { 'listing' : allobj, 'total_size': size, 'num_files': count } template = JINJA_ENVIRONMENT.get_template('index.html') self.response.write(template.render(template_values))
def run(self, job_name, input_files): # Return immediately if we have no content to shuffle. # Big shuffler can not handle no input. empty = True for filename in input_files: if files.stat(filename).st_size > 0: empty = False break if empty: self.complete([]) return shard_number = len(input_files) output_files = [] for i in range(shard_number): blob_file_name = (job_name + "-shuffle-output-" + str(i)) file_name = files.blobstore.create( _blobinfo_uploaded_filename=blob_file_name) output_files.append(file_name) self.fill(self.outputs._output_files, output_files) # Support shuffler callbacks going to specific modules and # specific non-default versions of those modules. target = modules.get_current_version_name() module_name = modules.get_current_module_name() if module_name != "default": # NOTE(user): The final dot is necessary here because old versions # of the shuffler library would put "myversion.12345678" in this field, # expecting the admin-shuffler app to remove the timestamp suffix. target = "%s.%s." % (target, module_name) files.shuffler.shuffle( "%s-%s" % (job_name, int(time.time())), input_files, output_files, { "url": self.get_callback_url(), # NOTE(user): This is always GET because of # how the admin_shuffler app adds the callback # task with additional URL params. "method": "GET", "queue": self.queue_name, "version": target, })
def run(self, job_name, input_files): # Return immediately if we have no content to shuffle. # Big shuffler can not handle no input. empty = True for filename in input_files: if files.stat(filename).st_size > 0: empty = False break if empty: self.complete([]) return shard_number = len(input_files) output_files = [] for i in range(shard_number): blob_file_name = (job_name + "-shuffle-output-" + str(i)) file_name = files.blobstore.create( _blobinfo_uploaded_filename=blob_file_name) output_files.append(file_name) self.fill(self.outputs._output_files, output_files) # Support shuffler callbacks going to specific modules and # specific non-default versions of those modules. target = modules.get_current_version_name() module_name = modules.get_current_module_name() if module_name != "default": # NOTE(user): The final dot is necessary here because old versions # of the shuffler library would put "myversion.12345678" in this field, # expecting the admin-shuffler app to remove the timestamp suffix. target = "%s.%s." % (target, module_name) files.shuffler.shuffle("%s-%s" % (job_name, int(time.time())), input_files, output_files, { "url": self.get_callback_url(), # NOTE(user): This is always GET because of # how the admin_shuffler app adds the callback # task with additional URL params. "method": "GET", "queue": self.queue_name, "version": target, })
def run(self, job_name, input_files): empty = True for filename in input_files: if files.stat(filename).st_size > 0: empty = False break if empty: self.complete([]) return shard_number = len(input_files) output_files = [] for i in range(shard_number): blob_file_name = (job_name + "-shuffle-output-" + str(i)) file_name = files.blobstore.create( _blobinfo_uploaded_filename=blob_file_name) output_files.append(file_name) self.fill(self.outputs._output_files, output_files) target = modules.get_current_version_name() module_name = modules.get_current_module_name() if module_name != "default": target = "%s.%s." % (target, module_name) files.shuffler.shuffle("%s-%s" % (job_name, int(time.time())), input_files, output_files, { "url": self.get_callback_url(), "method": "GET", "queue": self.queue_name, "version": target, })