def compare_site_thread_csv(file, progress_var=None, step=100.0): if status["INTERFACE_MODE"]: thread_pool_csv = ThreadPool(settings["THREADPOOL_SIZE"]) else: thread_pool_csv = ThreadPool(20) f = open(file, 'r') # calculate the step for each site row_count = sum(1 for row in f) site_step = step / row_count f.close() f = open(file, 'r') rows = csv.reader(f) for row in rows: compare_site_thread(row[0], row[1], progress_var=progress_var, step=site_step, thread_pool_csv=thread_pool_csv) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: f.close() return f.close() thread_pool_csv.wait_completion() thread_pool_csv.destroy()
def add_review_record(): global hotel_counter read_from_mongoDB("hotel_listing") print("No. of hotel in name list provided:", len(hotel_name_list)) print("No. of hotel url found on TripAdvisor website:", len(hotel_name)) if len(hotel_name) == len(hotel_name_list): print('Hotel list matched.') # for i in hotel_url: # # print(i) # get_hotel_review(i) pool = ThreadPool(REVIEW_THREADS) while hotel_counter < len(hotel_name): print('Hotel no.:', hotel_counter + 1) print(hotel_name[hotel_counter]) print(hotel_url[hotel_counter]) try: param1 = [hotel_url[hotel_counter]] pool.map(get_hotel_review, param1) pool.wait_completion() #get_hotel_review(hotel_url[hotel_counter]) hotel_counter += 1 except: print("Error: unable to queue for get_hotel_review") else: print('Hotel list mismatched.')
def main(): appName = sys.argv[0] logging.basicConfig( # filename = './log/' + appName + '_' + time.strftime("%Y%m%d_%H%M%S") + '.log', datefmt = '%Y-%m%d %H:%M:%S', format = '%(asctime)s | %(levelname)s | %(name)s | %(message)s', level = logging.INFO ) func_name = sys._getframe().f_code.co_name logger = logging.getLogger(func_name) logger.info('Started') parseArgs() logger.debug('requests.session') session = requests.session() # `mount` a custom adapter that retries failed connections for HTTP and HTTPS requests. session.mount("http://", requests.adapters.HTTPAdapter(max_retries=10)) logger.debug('session.post') r = session.post(LOGIN_URL, data = LOGIN_PARAM) idResultEnd = idResult idResultBegin = idResult - quantResult # 1) Init a Thread pool with the desired number of threads logger.debug('ThreadPool') pool = ThreadPool(10) logger.debug('for idAtual in xrange(%d, %d, -1)' % (idResultEnd, idResultBegin)) for idAtual in xrange(idResultEnd, idResultBegin, -1): # 2) Add the task to the queue pool.add_task(downloadResult, session, idAtual) # 3) Wait for completion pool.wait_completion() ### logger.info('Finished')
def main(condicion_venta, estado, threads): start = datetime.now() try: condicion_venta = condicion_venta.replace('-', '_') estado = estado.replace('-', '_') condicion_venta = CondicionVenta[condicion_venta] estado = Estado[estado] max_threads = int(threads) print('Parámetros de búsqueda') print('Venta : {0}'.format(condicion_venta.value)) print('Vigente: {0}'.format(estado.value)) except KeyError: print( 'No fue posible determinar la condicion de venta o estado de medicamentos a procesar' ) return 1 except ValueError: print('No se proporcionó un número de hilos de ejecución válido') return 1 thread = IspParser(sale_terms=condicion_venta, status=estado) max_pages = thread.pages_count pool = ThreadPool(max_threads, IspParser) for i in range(1, max_pages + 1): pool.add_task({ 'sale_terms': condicion_venta, 'status': estado, 'page_number': i }) pool.wait_completion() end = datetime.now() print('Tiempo transcurrido: {0}'.format(end - start))
def __scanfile(self, file): scanner = FileScanner(file, self.outDir, self.modules, self.profile, self.pool) scanner.scan() if __name__ == '__main__': pool = ThreadPool(MAX_THREADS) if options.inputDir: scanner = DirScanner(options.inputDir, options.outputDir, MODULES, options.profile, pool) scanner.scan() else: scanner = FileScanner(options.inputFile, options.outputDir, MODULES, options.profile, pool) scanner.scan() pool.wait_completion() #Search all results if required if options.search: if options.search == 'IP': pattern = re.compile(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b") else: pattern = re.compile(options.search) ofile = open (os.path.join(options.outputDir, 'SearchHits.txt'), 'w+') for root, dirs, files in walk(options.outputDir): for fname in files: if fname <> 'SearchHits.txt': ifile = open(os.path.join(root, fname), 'r') line = 0 for text in ifile.readlines(): line = line + 1
def matchAll(phase, suffix): global basics global rules global matches global failures global predicates global locators global noFiles global noFilesAffected global noUnits global noPatternConstraints global noPatternConstraintsOk global noContentConstraints global noContentConstraintsOk global noPredicateConstraints global noPredicateConstraintsOk global noFragments if (phase!="basics"): basics = tools101.getBasics() rules = json.load(open(const101.rulesDump, 'r'))["results"]["rules"] matches = list() failures = list() predicates = set() locators = set() noFiles = 0 noUnits = 0 noFilesAffected = 0 noPatternConstraints = 0 noPatternConstraintsOk = 0 noContentConstraints = 0 noContentConstraintsOk = 0 noPredicateConstraints = 0 noPredicateConstraintsOk = 0 noFragments = 0 pool = ThreadPool(4) print "Matching 101meta metadata on 101repo (phase \"" + str(phase)+ "\")." for root, dirs, files in os.walk(os.path.join(const101.sRoot, "contributions"), followlinks=True): if not root.startswith(os.path.join(const101.sRoot, ".git")+os.sep): for basename in files: noFiles += 1 if not basename in [".gitignore"]: dirname = root[len(const101.sRoot)+1:] pool.add_task(handleFile, phase, dirname, basename, suffix) #handleFile(phase, dirname, basename, suffix) sys.stdout.write('\n') pool.wait_completion() mr = dict() mr["matches"] = matches mr["failures"] = failures mr["rules"] = rules if phase=="predicates": mr["predicates"] = list(predicates) if phase=="fragments": mr["locators"] = list(locators) print str(noFiles) + " files examined." print str(noFilesAffected) + " files affected." print str(len(failures)) + " failures encountered." print str(noUnits) + " metadata units attached." print str(noContentConstraints) + " content constraints checked." print str(noContentConstraintsOk) + " content constraints succeeded." print str(noPatternConstraints) + " filename-pattern constraints checked." print str(noPatternConstraintsOk) + " filename-pattern constraints succeeded." if phase=="predicates": print str(noPredicateConstraints) + " predicate constraints checked." print str(noPredicateConstraintsOk) + " predicate constraints succeeded." if phase=="fragments": print str(len(locators)) + " fragment locators exercised." print str(noFragments) + " fragment descriptions checked." return mr
def runHPCEvent(self): tolog("runHPCEvent") self.__job.jobState = "running" self.__job.setState([self.__job.jobState, 0, 0]) self.__job.pilotErrorDiag = None rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test") defRes = self.getDefaultResources() if defRes['copy_input_files'] == 'true': self.__copyInputFiles = True else: self.__copyInputFiles = False status, output, hpcJob = self.prepareHPCJob() if status == 0: tolog("HPC Job: %s " % hpcJob) else: tolog("failed to create the Tag file") self.failJob(0, PilotErrors.ERR_UNKNOWN, self.__job, pilotErrorDiag=output) return self.__hpcStatus = None self.__hpcLog = None logFileName = None tolog("runJobHPCEvent.getPilotLogFilename=%s"% self.getPilotLogFilename()) if self.getPilotLogFilename() != "": logFileName = self.getPilotLogFilename() hpcManager = HPCManager(globalWorkingDir=self.__job.workdir, logFileName=logFileName, poolFileCatalog=self.__poolFileCatalogTemp, inputFiles=self.__inputFilesGlobal, copyInputFiles=self.__copyInputFiles) self.__hpcManager = hpcManager self.HPCMode = "HPC_" + hpcManager.getMode(defRes) self.__job.setMode(self.HPCMode) self.__job.setHpcStatus('waitingResource') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) hpcManager.getFreeResources(defRes) self.__job.coreCount = hpcManager.getCoreCount() self.__job.setHpcStatus('gettingEvents') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) numRanges = hpcManager.getEventsNumber() tolog("HPC Manager needs events: %s, max_events: %s; use the smallest one." % (numRanges, defRes['max_events'])) if numRanges > int(defRes['max_events']): numRanges = int(defRes['max_events']) eventRanges = self.getEventRanges(numRanges=numRanges) #tolog("Event Ranges: %s " % eventRanges) if len(eventRanges) == 0: tolog("Get no Event ranges. return") return for eventRange in eventRanges: self.__eventRanges[eventRange['eventRangeID']] = 'new' # setup stage out self.setupStageOutHPCEvent() hpcManager.initJob(hpcJob) hpcManager.initEventRanges(eventRanges) hpcManager.submit() threadpool = ThreadPool(defRes['stageout_threads']) old_state = None time_start = time.time() while not hpcManager.isFinished(): state = hpcManager.poll() self.__job.setHpcStatus(state) if old_state is None or old_state != state or time.time() > (time_start + 60*10): old_state = state time_start = time.time() tolog("HPCManager Job stat: %s" % state) self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test") rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) if state and state == 'Complete': break outputs = hpcManager.getOutputs() for output in outputs: #self.stageOutHPCEvent(output) threadpool.add_task(self.stageOutHPCEvent, output) time.sleep(30) self.updateHPCEventRanges() tolog("HPCManager Job Finished") self.__job.setHpcStatus('stagingOut') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) outputs = hpcManager.getOutputs() for output in outputs: #self.stageOutHPCEvent(output) threadpool.add_task(self.stageOutHPCEvent, output) self.updateHPCEventRanges() threadpool.wait_completion() self.updateHPCEventRanges() if len(self.__failedStageOuts) > 0: tolog("HPC Stage out retry 1") half_stageout_threads = defRes['stageout_threads'] / 2 if half_stageout_threads < 1: half_stageout_threads = 1 threadpool = ThreadPool(half_stageout_threads) failedStageOuts = self.__failedStageOuts self.__failedStageOuts = [] for failedStageOut in failedStageOuts: threadpool.add_task(self.stageOutHPCEvent, failedStageOut) threadpool.wait_completion() self.updateHPCEventRanges() if len(self.__failedStageOuts) > 0: tolog("HPC Stage out retry 2") threadpool = ThreadPool(1) failedStageOuts = self.__failedStageOuts self.__failedStageOuts = [] for failedStageOut in failedStageOuts: threadpool.add_task(self.stageOutHPCEvent, failedStageOut) threadpool.wait_completion() self.updateHPCEventRanges() self.__job.setHpcStatus('finished') self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) self.__hpcStatus, self.__hpcLog = hpcManager.checkHPCJobLog() tolog("HPC job log status: %s, job log error: %s" % (self.__hpcStatus, self.__hpcLog))
def main(argv): #Parse commandline arguments #http://www.tutorialspoint.com/python/python_command_line_arguments.htm update_existing = False try: opts, args = getopt.getopt(argv,"h:u:") except getopt.GetoptError: print "run_processing.py -u <True/False>" sys.exit(2) for opt, arg in opts: if opt == "-h": print "run_processing.py -u <True/False>" elif opt == "-u": if arg == "True": update_existing = True projects = [d for d in os.listdir(devknowledge.settings.VERSION_CONTROL_REPOS) if os.path.isdir(os.path.join(devknowledge.settings.VERSION_CONTROL_REPOS, d))] if update_existing: print "Running in-place update." manager = None else: manager = consumerDatabaseThreadManager("knowledge") pool = ThreadPool(devknowledge.settings.CONCURRENT_THREADS) tip_hashes = {} all_files = [] last_line = [] #iterate through all projects for project in projects: if project in devknowledge.settings.PROJECT_FOLDERS: all_files, last_lines = Util.returnFilesLastLine(project) if update_existing: #there were issues with threads duplicating authors/files so we just mass add them at the beginning print "Starting initial processing of authors/files." Git.createFilesAuthorsInIndex(project, all_files) print "Finished creating authors/files." number_to_process = len(all_files) print "Number of files to process: ", number_to_process processed_files = 0 for i in range(0, len(all_files)): file = all_files[i] last_line = last_lines[i] print "Project: ", project, " ", file, " number lines: ", last_line project_type = Util.returnProjectType(devknowledge.settings.VERSION_CONTROL_REPOS+project) if project_type == "git": #run Git analysis if project not in tip_hashes: #create new head hash cache tip_hashes[project] = Util.returnHeadHash(devknowledge.settings.VERSION_CONTROL_REPOS+project) if update_existing: Git.storeKnowledge(manager, project, file, 1, last_line, update_existing, tip_hashes[project]) else: pool.add_task(Git.storeKnowledge, manager, project, file, 1, last_line, update_existing, tip_hashes[project]) elif project_type == "hg": #run Mercurial analysis if project not in tip_hashes: #create new tip hash cache tip_hashes[project] = Mercurial.returnTipHash(devknowledge.settings.VERSION_CONTROL_REPOS+project) pool.add_task(Mercurial.storeKnowledge, manager, project, file, 1, last_line, tip_hashes[project]) processed_files += 1 print "Percent done: %.2f %%" % float(float(processed_files)/float(number_to_process) * 100) Util.checkDatabaseFolderSize() print "Finishing up writing data to database." pool.wait_completion() if update_existing: #prune database of stale file and author nodes print "Starting prune of database." if project_type == "git": Git.pruneDatabaseStaleFiles(all_files, project) Git.pruneDatabaseStaleAuthors() Git.pruneAllOtherNodes() elif project_type == "hg": print "Not yet implemented." if manager: manager.markForFinish() print "Done. Exiting."
def compare_site_thread(old_url, new_url, progress_var=None, step=100.0, thread_pool_csv=None): # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return # checking multiple sites mode if thread_pool_csv: thread_pool = thread_pool_csv else: thread_pool = ThreadPool(settings["THREADPOOL_SIZE"]) create_path() ind = 0 old_url = old_url.strip() new_url = new_url.strip() # remove the "/" at the end of the url if old_url[-1] == '/': old_url = old_url[:-1] if new_url[-1] == '/': new_url = new_url[:-1] # add "http://" before url if not old_url.startswith("http"): old_url = "http://" + old_url if not new_url.startswith("http"): new_url = "http://" + new_url # print out the information for old and new sites entry_print("-----------------------------------------------------", True) entry_print("Old URL: " + old_url, True) entry_print("New URL: " + new_url, True) entry_print("-----------------------------------------------------", True) setup_step = step * 0.01 if progress_var: progress_var.set(progress_var.get() + setup_step) # check if the new site needs login new_test = get_soup(new_url) if new_test: title = new_test.find("title") if title and title.get_text().strip() == "Login": entry_print( "New site needs login. Please use login mode to check this site!\n", True) return -1 setup_step = step * 0.01 if progress_var: progress_var.set(progress_var.get() + setup_step) # get the subpages of old and new sites try: sites = get_sites(old_url) except AttributeError: entry_print( "Can't find the site map from " + old_url + ". Please check if the url is valid!", True) thread_pool.destroy() return old_blog = get_blog_site(old_url) new_blog = get_blog_site(new_url) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: thread_pool.destroy() return blog_exists = False if old_blog and new_blog: blog_exists = True # if urls for subpages are not found if sites is None: record_error(new_url, "sites") if progress_var: progress_var.set(progress_var.get() + step) return False # if blog page is not found if old_blog is not None and new_blog is None: record_error(new_url, "blog") elif old_blog is None and new_blog is not None: record_error(old_url, "blog") setup_step = step * 0.02 if progress_var: progress_var.set(progress_var.get() + setup_step) # print out site information entry_print("Site Information: ", True) # calculate the step for each page step *= 0.96 if blog_exists: page_step = step / 2 / (len(sites) + 1) entry_print("Old Blog: " + old_blog, True) entry_print("New Blog: " + new_blog, True) else: page_step = step / (len(sites) + 1) entry_print("Number of non-blog pages: " + str(len(sites)), True) # check the homepage thread_pool.add_task(compare_homepage, old_url=old_url, new_url=new_url, progress_var=progress_var, step=page_step) # check all the sites in sitemap for site in sites: ind += 1 if site.startswith("/home") or site.startswith("/main"): continue old_link = old_url + site new_link = new_url + site thread_pool.add_task(compare_page, old_url=old_link, new_url=new_link, progress_var=progress_var, step=page_step) # check all the blog pages if blog_exists: old_blog_soup = get_soup(old_blog) new_blog_soup = get_soup(new_blog) compare_blog(old_blog_soup, new_blog_soup, old_blog, new_blog, progress_var=progress_var, step=step / 2) # single site mode if not thread_pool_csv: thread_pool.wait_completion() thread_pool.destroy() entry_print("-----------------------------------------------------\n") return True
def main(argv): #Parse commandline arguments #http://www.tutorialspoint.com/python/python_command_line_arguments.htm update_existing = False try: opts, args = getopt.getopt(argv, "h:u:") except getopt.GetoptError: print "run_dependencies.py -u <True/False>" sys.exit(2) for opt, arg in opts: if opt == "-h": print "run_dependencies.py -u <True/False>" elif opt == "-u": if arg == "True": update_existing = True projects = [ d for d in os.listdir(devknowledge.settings.VERSION_CONTROL_REPOS) if os.path.isdir( os.path.join(devknowledge.settings.VERSION_CONTROL_REPOS, d)) ] if update_existing: print "Running in-place update." manager = None else: manager = consumerDatabaseThreadManager("dependencies") pool = ThreadPool(devknowledge.settings.CONCURRENT_THREADS) #iterate through all projects for project in projects: if project in devknowledge.settings.PROJECT_FOLDERS: all_files, last_lines = Util.returnFilesLastLine(project) c_files = Util.returnCandCPlusPlusFiles(all_files, project) number_to_process = len(c_files) print "Number of files to process: ", number_to_process processed_files = 0 for file in c_files: last_line = last_lines[processed_files] #print "Project: ", project, " ", file, " number lines: ", last_line if update_existing: FileDependencies.parseFileDependencies( manager, project, file, c_files, update_existing) else: pool.add_task(FileDependencies.parseFileDependencies, manager, project, file, c_files, update_existing) processed_files += 1 print "Percent done: %.2f %%" % float( float(processed_files) / float(number_to_process) * 100) Util.checkDatabaseFolderSize() print "Finishing up writing data to database." pool.wait_completion() if not update_existing: manager.markForFinish() print "Done. Exiting."
def runHPCEvent(self): tolog("runHPCEvent") self.__job.jobState = "running" self.__job.setState([self.__job.jobState, 0, 0]) self.__job.pilotErrorDiag = None rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test") defRes = self.getDefaultResources() if defRes['copy_input_files'] == 'true': self.__copyInputFiles = True else: self.__copyInputFiles = False status, output, hpcJob = self.prepareHPCJob() if status == 0: tolog("HPC Job: %s " % hpcJob) else: tolog("failed to create the Tag file") self.failJob(0, PilotErrors.ERR_UNKNOWN, self.__job, pilotErrorDiag=output) return self.__hpcStatus = None self.__hpcLog = None logFileName = None tolog("runJobHPCEvent.getPilotLogFilename=%s" % self.getPilotLogFilename()) if self.getPilotLogFilename() != "": logFileName = self.getPilotLogFilename() hpcManager = HPCManager(globalWorkingDir=self.__job.workdir, logFileName=logFileName, poolFileCatalog=self.__poolFileCatalogTemp, inputFiles=self.__inputFilesGlobal, copyInputFiles=self.__copyInputFiles) self.__hpcManager = hpcManager self.HPCMode = "HPC_" + hpcManager.getMode(defRes) self.__job.setMode(self.HPCMode) self.__job.setHpcStatus('waitingResource') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) hpcManager.getFreeResources(defRes) self.__job.coreCount = hpcManager.getCoreCount() self.__job.setHpcStatus('gettingEvents') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) numRanges = hpcManager.getEventsNumber() tolog( "HPC Manager needs events: %s, max_events: %s; use the smallest one." % (numRanges, defRes['max_events'])) if numRanges > int(defRes['max_events']): numRanges = int(defRes['max_events']) eventRanges = self.getEventRanges(numRanges=numRanges) #tolog("Event Ranges: %s " % eventRanges) if len(eventRanges) == 0: tolog("Get no Event ranges. return") return for eventRange in eventRanges: self.__eventRanges[eventRange['eventRangeID']] = 'new' # setup stage out self.setupStageOutHPCEvent() hpcManager.initJob(hpcJob) hpcManager.initEventRanges(eventRanges) hpcManager.submit() threadpool = ThreadPool(defRes['stageout_threads']) old_state = None time_start = time.time() while not hpcManager.isFinished(): state = hpcManager.poll() self.__job.setHpcStatus(state) if old_state is None or old_state != state or time.time() > ( time_start + 60 * 10): old_state = state time_start = time.time() tolog("HPCManager Job stat: %s" % state) self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test") rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) if state and state == 'Complete': break outputs = hpcManager.getOutputs() for output in outputs: #self.stageOutHPCEvent(output) threadpool.add_task(self.stageOutHPCEvent, output) time.sleep(30) self.updateHPCEventRanges() tolog("HPCManager Job Finished") self.__job.setHpcStatus('stagingOut') rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort()) self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) outputs = hpcManager.getOutputs() for output in outputs: #self.stageOutHPCEvent(output) threadpool.add_task(self.stageOutHPCEvent, output) self.updateHPCEventRanges() threadpool.wait_completion() self.updateHPCEventRanges() if len(self.__failedStageOuts) > 0: tolog("HPC Stage out retry 1") half_stageout_threads = defRes['stageout_threads'] / 2 if half_stageout_threads < 1: half_stageout_threads = 1 threadpool = ThreadPool(half_stageout_threads) failedStageOuts = self.__failedStageOuts self.__failedStageOuts = [] for failedStageOut in failedStageOuts: threadpool.add_task(self.stageOutHPCEvent, failedStageOut) threadpool.wait_completion() self.updateHPCEventRanges() if len(self.__failedStageOuts) > 0: tolog("HPC Stage out retry 2") threadpool = ThreadPool(1) failedStageOuts = self.__failedStageOuts self.__failedStageOuts = [] for failedStageOut in failedStageOuts: threadpool.add_task(self.stageOutHPCEvent, failedStageOut) threadpool.wait_completion() self.updateHPCEventRanges() self.__job.setHpcStatus('finished') self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443) self.__hpcStatus, self.__hpcLog = hpcManager.checkHPCJobLog() tolog("HPC job log status: %s, job log error: %s" % (self.__hpcStatus, self.__hpcLog))
class DroidStager(threading.Thread): def __init__(self, globalWorkingDir, localWorkingDir, outputs=None, job=None, esJobManager=None, outputDir=None, rank=None, logger=None): threading.Thread.__init__(self) self.__globalWorkingDir = globalWorkingDir self.__localWorkingDir = localWorkingDir self.__currentDir = None self.__rank = rank if logger and False: self.__tmpLog = logger else: curdir = _abspath(self.__localWorkingDir) wkdirname = "rank_%s" % str(self.__rank) wkdir = _abspath(_join(curdir, wkdirname)) self.__tmpLog = Logger.Logger( filename=os.path.join(wkdir, 'Droid.log')) self.__job = job self.__esJobManager = esJobManager self.__stop = threading.Event() self.__isFinished = False self.__tmpLog.info("Rank %s: Global working dir: %s" % (self.__rank, self.__globalWorkingDir)) os.environ['PilotHomeDir'] = os.path.dirname(self.__globalWorkingDir) self.__jobId = None self.__copyOutputToGlobal = False self.__outputDir = outputDir self.__hostname = socket.getfqdn() self.__outputs = outputs self.__threadpool = None self.setup(job) def setup(self, job): try: self.__jobId = job.get("JobId", None) self.__yodaToOS = job.get('yodaToOS', False) self.__yodaToZip = job.get('yodaToZip', False) self.__zipFileName = job.get('zipFileName', None) self.__zipEventRangesName = job.get('zipEventRangesName', None) self.__tmpLog.debug("Rank %s: zip file %s" % (self.__rank, self.__zipFileName)) self.__tmpLog.debug("Rank %s: zip event range file %s" % (self.__rank, self.__zipEventRangesName)) if self.__zipFileName is None or self.__zipEventRangesName is None: self.__tmpLog.debug( "Rank %s: either zipFileName(%s) is None or zipEventRanagesName(%s) is None, will not use zip output" % (self.__rank, self.__zipFileName, self.__zipEventRangesName)) self.__yodaToZip = False self.__copyOutputToGlobal = job.get('copyOutputToGlobal', False) if self.__yodaToOS: setup = job.get('setup', None) self.__esPath = job.get('esPath', None) self.__os_bucket_id = job.get('os_bucket_id', None) self.__report = getInitialTracingReport( userid='Yoda', sitename='Yoda', dsname=None, eventType="objectstore", analysisJob=False, jobId=None, jobDefId=None, dn='Yoda') self.__siteMover = objectstoreSiteMover(setup, useTimerCommand=False) self.__cores = int(job.get('ATHENA_PROC_NUMBER', 1)) self.__tmpLog.debug("Rank %s: start threadpool" % (self.__rank)) self.__threadpool = ThreadPool(self.__cores / 8) except: self.__tmpLog.error("Failed to setup Droid stager: %s" % str(traceback.format_exc())) def copyOutput(self, output, outputs): if self.__outputDir: for filename in outputs: #filename = output.split(",")[0] base_filename = os.path.basename(filename) new_file_name = os.path.join(self.__outputDir, base_filename) is_copied = False try: os.rename(filename, new_file_name) is_copied = True except: self.__tmpLog.debug( "Rank %s: failed to move output %s to %s, %s" % (self.__rank, filename, new_file_name, str(traceback.format_exc()))) is_copied = False if not is_copied: shutil.copy(filename, new_file_name) os.remove(filename) output = output.replace(filename, new_file_name) return 0, output elif self.__copyOutputToGlobal: for filename in outputs: #filename = output.split(",")[0] base_filename = os.path.basename(filename) new_file_name = os.path.join(self.__globalWorkingDir, base_filename) is_copied = False try: os.rename(filename, new_file_name) is_copied = True except: self.__tmpLog.debug( "Rank %s: failed to move output %s to %s, %s" % (self.__rank, filename, new_file_name, str(traceback.format_exc()))) is_copied = False if not is_copied: shutil.copy(filename, new_file_name) os.remove(filename) output = output.replace(filename, new_file_name) return 0, output else: if self.__localWorkingDir == self.__globalWorkingDir: return 0, output for filename in outputs: #filename = output.split(",")[0] new_file_name = filename.replace(self.__localWorkingDir, self.__globalWorkingDir) dirname = os.path.dirname(new_file_name) if not os.path.exists(dirname): os.makedirs(dirname) shutil.copy(filename, new_file_name) os.remove(filename) output = output.replace(filename, new_file_name) return 0, output def stageOutToOS(self, outputs): ret_status = 0 ret_outputs = [] try: for filename in outputs: ret_status, pilotErrorDiag, surl, size, checksum, arch_type = self.__siteMover.put_data( filename, self.__esPath, lfn=os.path.basename(filename), report=self.__report, token=None, experiment='ATLAS') if ret_status == 0: os.remove(filename) ret_outputs.append(surl) else: self.__tmpLog.debug("Failed to stageout %s: %s %s" % (filename, ret_status, pilotErrorDiag)) return ret_status, pilotErrorDiag except: self.__tmpLog.warning( "Rank %s: Droid throws exception when staging out: %s" % (self.__rank, traceback.format_exc())) ret_status = -1 return ret_status, ret_outputs def zipOutputs(self, eventRangeID, eventStatus, outputs): try: for filename in outputs: command = "tar -rf " + self.__zipFileName + " --directory=%s %s" % ( os.path.dirname(filename), os.path.basename(filename)) status, ret = commands.getstatusoutput(command) if status: self.__tmpLog.debug("Failed to zip %s: %s, %s" % (filename, status, ret)) return status, ret else: os.remove(filename) except: self.__tmpLog.warning( "Rank %s: Droid throws exception when zipping out: %s" % (self.__rank, traceback.format_exc())) return -1, "Failed to zip outputs" else: handler = open(self.__zipEventRangesName, "a") handler.write("%s %s %s\n" % (eventRangeID, eventStatus, outputs)) handler.close() return 0, outputs def stageOut(self, eventRangeID, eventStatus, output, retries=0): if eventStatus.startswith("ERR"): request = { "eventRangeID": eventRangeID, 'eventStatus': eventStatus, "output": output } else: outputs = output.split(",")[:-3] if self.__yodaToZip: self.__tmpLog.debug("Rank %s: start to zip outputs: %s" % (self.__rank, outputs)) retStatus, retOutput = self.zipOutputs(eventRangeID, eventStatus, outputs) if retStatus != 0: self.__tmpLog.error( "Rank %s: failed to zip outputs %s: %s" % (self.__rank, outputs, retOutput)) request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': eventStatus, "output": output } else: self.__tmpLog.info( "Rank %s: finished to zip outputs %s: %s" % (self.__rank, outputs, retOutput)) request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': 'zipped', "output": retOutput } elif self.__yodaToOS: self.__tmpLog.debug( "Rank %s: start to stage out outputs to objectstore: %s" % (self.__rank, outputs)) retStatus, retOutput = self.stageOutToOS(outputs) if retStatus != 0: self.__tmpLog.error( "Rank %s: failed to stagout outputs %s to objectstore: %s" % (self.__rank, outputs, retOutput)) if retries < 1: self.stageOut(eventRangeID, eventStatus, output, retries=retries + 1) request = None else: request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': eventStatus, "output": output } else: self.__tmpLog.info( "Rank %s: finished to stageout outputs %s to objectstore: %s" % (self.__rank, outputs, retOutput)) request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': 'stagedOut', "output": retOutput, 'objstoreID': self.__os_bucket_id } else: self.__tmpLog.debug("Rank %s: start to copy outputs: %s" % (self.__rank, outputs)) retStatus, retOutput = self.copyOutputs(output, outputs) if retStatus != 0: self.__tmpLog.error( "Rank %s: failed to copy outputs %s: %s" % (self.__rank, outputs, retOutput)) request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': eventStatus, "output": output } else: self.__tmpLog.info( "Rank %s: finished to copy outputs %s: %s" % (self.__rank, outputs, retOutput)) request = { "jobId": self.__jobId, "eventRangeID": eventRangeID, 'eventStatus': eventStatus, "output": retOutput } if request: self.__outputs.put(request) def stop(self): self.__stop.set() def isFinished(self): return self.__isFinished def run(self): while True: try: outputs = self.__esJobManager.getOutputs() if outputs: self.__tmpLog.debug("Rank %s: getOutputs: %s" % (self.__rank, outputs)) for outputMsg in outputs: try: eventRangeID, eventStatus, output = outputMsg if self.__threadpool: self.__tmpLog.debug( "Rank %s: add event output to threadpool: %s" % (self.__rank, outputMsg)) self.__threadpool.add_task(self.stageOut, eventRangeID, eventStatus, output, retries=0) else: self.stageOut(eventRangeID, eventStatus, output, retries=0) except: self.__tmpLog.warning( "Rank %s: error message: %s" % (self.__rank, traceback.format_exc())) continue except: self.__tmpLog.error("Rank %s: Stager Thread failed: %s" % (self.__rank, traceback.format_exc())) if self.__stop.isSet(): if self.__threadpool: self.__tmpLog.warning( "Rank %s: wait threadpool to finish" % (self.__rank)) self.__threadpool.wait_completion() self.__tmpLog.warning("Rank %s: threadpool finished" % (self.__rank)) break time.sleep(1) self.__isFinished = True
d = randrange(1, 10) # sleep up to 10 sec try: logger.info('Adding task: sleep for %d secs (task %d)', d, task_num) pool.add_task(user, wait_delay, d, task_num) real_time_for_jobs += d except Exception as e: logging.warning(e) task_num += 1 time.sleep(delay) logger.info('user %s submitted jobs for %d secs', user, real_time_for_jobs) # 1) Init a Thread pool with the desired number of threads and number of users pool = ThreadPool(num_of_process, num_of_users, sched_policy, queue_policy) producers = [] for user in range(num_of_users): base_time = 20.0 # the slowest user delay = base_time / (user + 1) p = Process(target=producer, args=(pool, user, delay,)) producers.append(p) p.start() # 3) Wait for completion [p.join() for p in producers] pool.kill() pool.wait_completion() pool.scheduler.print_statistics()