def _do_execute_job(self, jid): # TODO need to specify which bag?? job_id = int(jid) while True: if len(self.jobs[job_id]) == 0: time.sleep(2) else: while len(self.jobs[job_id]) > 0: self.bags[job_id] += 1 bag_id = self.bags[job_id] jb_key = "%d.%d" % (job_id,bag_id) self.tf_job_dict[jb_key] = {} bag_path = self.jobs[job_id].popleft() lines = open(bag_path,'r').readlines() line = 0 for l in lines: submit_a_task.submit_a_task( job_id, bag_id, line, l, [] ) line += 1 #callback function that needs to state whether a task is done print l self.tf_job_dict[jb_key]['SamplingReady'] = False self.tf_job_dict[jb_key]['CompletedTasks'] = 0 self.tf_job_dict[jb_key]['TotalTasks'] = line self.tf_job_dict[jb_key]['SubmittedTasks'] = line self.tf_dict['submitted_tasks'] += self.tf_job_dict[jb_key]['SubmittedTasks'] self.tf_dict['job_dict'] = self.tf_job_dict Thread(target=self._do_poll, args=[job_id,self.bags[job_id]]).start()
def _do_execute_job(self, jid): # TODO need to specify which bag?? job_id = int(jid) while True: if len(self.jobs[job_id]) == 0: time.sleep(2) else: while len(self.jobs[job_id]) > 0: self.bags[job_id] += 1 bag_id = self.bags[job_id] jb_key = "%d.%d" % (job_id, bag_id) self.tf_job_dict[jb_key] = {} bag_path = self.jobs[job_id].popleft() lines = open(bag_path, 'r').readlines() line = 0 for l in lines: submit_a_task.submit_a_task(job_id, bag_id, line, l, []) line += 1 #callback function that needs to state whether a task is done print l self.tf_job_dict[jb_key]['SamplingReady'] = False self.tf_job_dict[jb_key]['CompletedTasks'] = 0 self.tf_job_dict[jb_key]['TotalTasks'] = line self.tf_job_dict[jb_key]['SubmittedTasks'] = line self.tf_dict['submitted_tasks'] += self.tf_job_dict[ jb_key]['SubmittedTasks'] self.tf_dict['job_dict'] = self.tf_job_dict Thread(target=self._do_poll, args=[job_id, self.bags[job_id]]).start()
def sample_job(self, job_id): # TODO set up a thread for collecting all info when all tasks are finished if job_id not in self.jobs: return -1 # TODO check if a bag has already been sampled, and refuse to sample it again bag_id = 0 # ALWAYS when sampling jb_key = "%d.%d" % (job_id,bag_id) if self.tf_job_dict[jb_key]['SamplingStarted'] == True: if self.tf_job_dict[jb_key]['SamplingReady'] == True: return -3 return -2 self.tf_dict['bags'] += 1 if not self.tf_job_dict.has_key(jb_key): self.tf_job_dict[jb_key] = {} replication_size = 7 print "Job ID = %d" % job_id bag_path = self.jobs[job_id].popleft() lines = open(bag_path,'r').read().splitlines() N = len(lines) size = int((N* 1.96*1.96)//((1.96*1.96)+(2*(N-1))*(0.2*0.2))) print "take %d samples from %d tasks" % (size, N) # first: find all available workertypes type_list=[] for w in self.registered_workers: workertype = self.registered_workers[w].type if workertype not in type_list: type_list.append(workertype) # second: randomly split the tasks (lines) into 2 lists: # 1) list for immediate processing, called sampling # 2) rest, to be processed at a later stage sample_list = list() for _ in range(size): take = random.randrange(0, N-_) # randomly pick a number out of the list taken = lines.pop(take) # remove from original list sample_list.append(taken) # add to sample list # third: submit all tasks in separate commands self.tf_job_dict[jb_key]['SamplingReady'] = False # TODO to use condor more efficiently, create just one ClassAd file sys.stdout.flush() for i in range(0,size): #function that submits on each worker type print >> sys.stderr, 'sample_job sampling ', job_id, i, sample_list[i] if i < replication_size: # to replicate job on all worker types, use type_list submit_a_task.submit_a_task( job_id, bag_id, i, sample_list[i], type_list ) else: submit_a_task.submit_a_task( job_id, bag_id, i, sample_list[i], [] ) # Put all lines that were not yet submitted in a file for later execution, and put the filename "in front of" the queue filename_leftovers = "%s/lo-j%d-b%d" % ( os.path.dirname(bag_path), job_id, bag_id ) print "leftovers go in ", filename_leftovers fd = open ( filename_leftovers, "w" ) # sample tasks were taken away, so just save the remaining lines for _ in lines: fd.write(_ + "\n") fd.close() self.add_on( filename_leftovers, job_id, False ) # some administration self.tf_job_dict[jb_key]['SamplingStarted'] = True self.tf_job_dict[jb_key]['SamplingReady'] = False self.tf_job_dict[jb_key]['CompletedTasks'] = 0 self.tf_job_dict[jb_key]['TotalTasks'] = size self.tf_job_dict[jb_key]['SubmittedTasks'] = size + replication_size * ( len ( type_list ) - 1 ) self.tf_dict['submitted_tasks'] += self.tf_job_dict[jb_key]['SubmittedTasks'] self.tf_dict['job_dict'] = self.tf_job_dict # TODO wait for all jobs to complete and return the run-times print "Wait for job completion in a Thread" sys.stdout.flush() Thread(target=self._do_poll, args=[job_id,bag_id]).start() # should return list of leftover tasks return size
def sample_job(self, job_id): # TODO set up a thread for collecting all info when all tasks are finished if job_id not in self.jobs: return -1 # TODO check if a bag has already been sampled, and refuse to sample it again bag_id = 0 # ALWAYS when sampling jb_key = "%d.%d" % (job_id, bag_id) if self.tf_job_dict[jb_key]['SamplingStarted'] == True: if self.tf_job_dict[jb_key]['SamplingReady'] == True: return -3 return -2 self.tf_dict['bags'] += 1 if not self.tf_job_dict.has_key(jb_key): self.tf_job_dict[jb_key] = {} replication_size = 7 print job_id bag_path = self.jobs[job_id].popleft() lines = open(bag_path, 'r').read().splitlines() N = len(lines) print N size = int( (N * 1.96 * 1.96) // ((1.96 * 1.96) + (2 * (N - 1)) * (0.2 * 0.2))) # def submit_a_task(jobnr, bagnr, tasknr, commandline, workerlist, thedict={}): # first: find all available workertypes type_list = [] for w in self.registered_workers: workertype = self.registered_workers[w].type if workertype not in type_list: type_list.append(workertype) # second: submit all tasks in separate commands self.tf_job_dict[jb_key]['SamplingReady'] = False # TODO to use condor more efficiently, create just one ClassAd file for i in range(0, size): #function that submits on each worker type print >> sys.stderr, 'sample_job sampling ', job_id, i, lines[i] if i < replication_size: # to replicate job on all worker types, use type_list submit_a_task.submit_a_task(job_id, bag_id, i, lines[i], type_list) else: submit_a_task.submit_a_task(job_id, bag_id, i, lines[i], []) # TODO Put all lines that were not yet submitted in a file for later execution, and put the filename "in front of" the queue filename_leftovers = "%s/lo-j%d-b%d" % (os.path.dirname(bag_path), job_id, bag_id) print >> sys.stderr, "leftovers go in ", filename_leftovers fd = open(filename_leftovers, "w") for i in range(size, N): fd.write(lines[i] + "\n") fd.close() self.add_on(filename_leftovers, job_id, False) # some administration self.tf_job_dict[jb_key]['SamplingStarted'] = True self.tf_job_dict[jb_key]['SamplingReady'] = False self.tf_job_dict[jb_key]['CompletedTasks'] = 0 self.tf_job_dict[jb_key]['TotalTasks'] = size self.tf_job_dict[jb_key][ 'SubmittedTasks'] = size + replication_size * (len(type_list) - 1) self.tf_dict['submitted_tasks'] += self.tf_job_dict[jb_key][ 'SubmittedTasks'] self.tf_dict['job_dict'] = self.tf_job_dict # TODO wait for all jobs to complete and return the run-times Thread(target=self._do_poll, args=[job_id, bag_id]).start() # should return list of leftover tasks return size
def sample_job(self, job_id): # TODO set up a thread for collecting all info when all tasks are finished if job_id not in self.jobs: return -1 # TODO check if a bag has already been sampled, and refuse to sample it again bag_id = 0 # ALWAYS when sampling jb_key = "%d.%d" % (job_id,bag_id) if self.tf_job_dict[jb_key]['SamplingStarted'] == True: if self.tf_job_dict[jb_key]['SamplingReady'] == True: return -3 return -2 self.tf_dict['bags'] += 1 if not self.tf_job_dict.has_key(jb_key): self.tf_job_dict[jb_key] = {} replication_size = 7 print job_id bag_path = self.jobs[job_id].popleft() lines = open(bag_path,'r').read().splitlines() N = len(lines) print N size = int((N* 1.96*1.96)//((1.96*1.96)+(2*(N-1))*(0.2*0.2))) # def submit_a_task(jobnr, bagnr, tasknr, commandline, workerlist, thedict={}): # first: find all available workertypes type_list=[] for w in self.registered_workers: workertype = self.registered_workers[w].type if workertype not in type_list: type_list.append(workertype) # second: submit all tasks in separate commands self.tf_job_dict[jb_key]['SamplingReady'] = False # TODO to use condor more efficiently, create just one ClassAd file for i in range(0,size): #function that submits on each worker type print >> sys.stderr, 'sample_job sampling ', job_id, i, lines[i] if i < replication_size: # to replicate job on all worker types, use type_list submit_a_task.submit_a_task( job_id, bag_id, i, lines[i], type_list ) else: submit_a_task.submit_a_task( job_id, bag_id, i, lines[i], [] ) # TODO Put all lines that were not yet submitted in a file for later execution, and put the filename "in front of" the queue filename_leftovers = "%s/lo-j%d-b%d" % ( os.path.dirname(bag_path), job_id, bag_id ) print >> sys.stderr, "leftovers go in ", filename_leftovers fd = open ( filename_leftovers, "w" ) for i in range(size, N): fd.write(lines[i] + "\n") fd.close() self.add_on( filename_leftovers, job_id, False ) # some administration self.tf_job_dict[jb_key]['SamplingStarted'] = True self.tf_job_dict[jb_key]['SamplingReady'] = False self.tf_job_dict[jb_key]['CompletedTasks'] = 0 self.tf_job_dict[jb_key]['TotalTasks'] = size self.tf_job_dict[jb_key]['SubmittedTasks'] = size + replication_size * ( len ( type_list ) - 1 ) self.tf_dict['submitted_tasks'] += self.tf_job_dict[jb_key]['SubmittedTasks'] self.tf_dict['job_dict'] = self.tf_job_dict # TODO wait for all jobs to complete and return the run-times Thread(target=self._do_poll, args=[job_id,bag_id]).start() # should return list of leftover tasks return size