def split(self): """Split a RecordIORecords data into two even chunks. :return: lower_entries, higher_entries, middle_entry """ if len(self) <= 1: raise RecordIOTooSmallToSplitError() lo_records = [] hi_records = [] lo_size = 0 hi_size = 0 left = -1 right = len(self) while left + 1 != right: if lo_size <= hi_size: left += 1 lo_records.append(self.records_[left]) lo_size += recordio_chunks.size(self.records_[left]) else: right -= 1 hi_records.insert(0,self.records_[right]) hi_size += recordio_chunks.size(self.records_[right]) middle_entry = hi_records[0] return (marshal.dumps(lo_records, MARSHAL_VERSION), marshal.dumps(hi_records, MARSHAL_VERSION), middle_entry)
def split(self): """Split a RecordIORecords data into two even chunks. :return: lower_entries, higher_entries, middle_entry """ if len(self) <= 1: raise RecordIOTooSmallToSplitError() lo_records = [] hi_records = [] lo_size = 0 hi_size = 0 left = -1 right = len(self) while left + 1 != right: if lo_size <= hi_size: left += 1 lo_records.append(self.records_[left]) lo_size += recordio_chunks.size(self.records_[left]) else: right -= 1 hi_records.insert(0, self.records_[right]) hi_size += recordio_chunks.size(self.records_[right]) middle_entry = hi_records[0] return (marshal.dumps(lo_records, MARSHAL_VERSION), marshal.dumps(hi_records, MARSHAL_VERSION), middle_entry)
def get(self): self.pull = taskqueue.Queue('recordio-queue') tag = self.request.get("taskqueue") max_tasks_to_lease = MAX_RPC_SIZE / MAX_TASKQUEUE_BATCH_SIZE if tag: batch = [] batch_size = 0 success = True while True: tasks = self.pull.lease_tasks_by_tag(LEASE_TIME_PER_BATCH, max_tasks_to_lease, tag=tag) for task in tasks: if task.was_deleted: # Should never happend continue next_key_values = marshal.loads(task.payload) next_size = sum( [recordio_chunks.size(x) for x in next_key_values]) if next_size + batch_size >= MAX_WRITE_BATCH_SIZE: success = success and self.commit_batch(tag, batch) batch = [(task, next_key_values)] batch_size = next_size else: batch_size += next_size batch.append((task, next_key_values)) if len(tasks) != max_tasks_to_lease: break success = success and self.commit_batch(tag, batch) if not success: raise Exception("RecordIO not completed") else: pending_tasks = self.pull.lease_tasks(0, max_tasks_to_lease) seen = set([]) for task in pending_tasks: tag = task.tag if tag in seen: continue seen.add(tag) try: taskqueue.Queue('recordio-writer').add( RecordIOWriter.create_task_(tag, in_past=True)) self.response.out.write("Scheduled write for: %s<br>" % tag) except (taskqueue.DuplicateTaskNameError, taskqueue.TombstonedTaskError, taskqueue.TaskAlreadyExistsError): self.response.out.write( "Already pending write for: %s<br>" % tag) if len(pending_tasks) == max_tasks_to_lease: self.response.out.write( "<script type=text/javascript>window.setTimeout(function() {" "document.location.reload();" "}, 5000);</script>")
def get(self): self.pull = taskqueue.Queue('recordio-queue') tag = self.request.get("taskqueue") max_tasks_to_lease = MAX_RPC_SIZE / MAX_TASKQUEUE_BATCH_SIZE if tag: batch = [] batch_size = 0 success = True while True: tasks = self.pull.lease_tasks_by_tag(LEASE_TIME_PER_BATCH, max_tasks_to_lease, tag=tag) for task in tasks: if task.was_deleted: # Should never happend continue next_key_values = marshal.loads(task.payload) next_size = sum([recordio_chunks.size(x) for x in next_key_values]) if next_size + batch_size >= MAX_WRITE_BATCH_SIZE: success = success and self.commit_batch(tag, batch) batch = [(task, next_key_values)] batch_size = next_size else: batch_size += next_size batch.append((task, next_key_values)) if len(tasks) != max_tasks_to_lease: break success = success and self.commit_batch(tag, batch) if not success: raise Exception("RecordIO not completed") else: pending_tasks = self.pull.lease_tasks(0, max_tasks_to_lease) seen = set([]) for task in pending_tasks: tag = task.tag if tag in seen: continue seen.add(tag) try: taskqueue.Queue('recordio-writer').add( RecordIOWriter.create_task_(tag, in_past=True)) self.response.out.write("Scheduled write for: %s<br>" % tag) except (taskqueue.DuplicateTaskNameError, taskqueue.TombstonedTaskError, taskqueue.TaskAlreadyExistsError): self.response.out.write("Already pending write for: %s<br>" % tag) if len(pending_tasks) == max_tasks_to_lease: self.response.out.write( "<script type=text/javascript>window.setTimeout(function() {" "document.location.reload();" "}, 5000);</script>")