Пример #1
0
def compare_site_thread_csv(file, progress_var=None, step=100.0):
    if status["INTERFACE_MODE"]:
        thread_pool_csv = ThreadPool(settings["THREADPOOL_SIZE"])
    else:
        thread_pool_csv = ThreadPool(20)

    f = open(file, 'r')

    # calculate the step for each site
    row_count = sum(1 for row in f)
    site_step = step / row_count
    f.close()

    f = open(file, 'r')
    rows = csv.reader(f)

    for row in rows:
        compare_site_thread(row[0],
                            row[1],
                            progress_var=progress_var,
                            step=site_step,
                            thread_pool_csv=thread_pool_csv)
        # check program status
        if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
            f.close()
            return
    f.close()

    thread_pool_csv.wait_completion()
    thread_pool_csv.destroy()
Пример #2
0
def add_review_record():

    global hotel_counter

    read_from_mongoDB("hotel_listing")
    print("No. of hotel in name list provided:", len(hotel_name_list))
    print("No. of hotel url found on TripAdvisor website:", len(hotel_name))

    if len(hotel_name) == len(hotel_name_list):
        print('Hotel list matched.')
        #         for i in hotel_url:
        #         #    print(i)
        #             get_hotel_review(i)
        pool = ThreadPool(REVIEW_THREADS)
        while hotel_counter < len(hotel_name):
            print('Hotel no.:', hotel_counter + 1)
            print(hotel_name[hotel_counter])
            print(hotel_url[hotel_counter])
            try:
                param1 = [hotel_url[hotel_counter]]
                pool.map(get_hotel_review, param1)
                pool.wait_completion()
                #get_hotel_review(hotel_url[hotel_counter])
                hotel_counter += 1
            except:
                print("Error: unable to queue for get_hotel_review")
    else:
        print('Hotel list mismatched.')
Пример #3
0
def main():
	appName = sys.argv[0]
	logging.basicConfig(
#		filename = './log/' + appName + '_' + time.strftime("%Y%m%d_%H%M%S") + '.log',
		datefmt = '%Y-%m%d %H:%M:%S',
		format = '%(asctime)s | %(levelname)s | %(name)s | %(message)s',
		level = logging.INFO
	)
	func_name = sys._getframe().f_code.co_name
	logger = logging.getLogger(func_name)
	logger.info('Started')

	parseArgs()

	logger.debug('requests.session')
	session = requests.session()
	# `mount` a custom adapter that retries failed connections for HTTP and HTTPS requests.
	session.mount("http://", requests.adapters.HTTPAdapter(max_retries=10))

	logger.debug('session.post')
	r = session.post(LOGIN_URL, data = LOGIN_PARAM)

	idResultEnd = idResult
	idResultBegin = idResult - quantResult
	# 1) Init a Thread pool with the desired number of threads
	logger.debug('ThreadPool')
	pool = ThreadPool(10)
	logger.debug('for idAtual in xrange(%d, %d, -1)' % (idResultEnd, idResultBegin))
	for idAtual in xrange(idResultEnd, idResultBegin, -1):
		# 2) Add the task to the queue
		pool.add_task(downloadResult, session, idAtual)
	# 3) Wait for completion
	pool.wait_completion()
	###
	logger.info('Finished')
Пример #4
0
def main(condicion_venta, estado, threads):
    start = datetime.now()
    try:
        condicion_venta = condicion_venta.replace('-', '_')
        estado = estado.replace('-', '_')
        condicion_venta = CondicionVenta[condicion_venta]
        estado = Estado[estado]
        max_threads = int(threads)
        print('Parámetros de búsqueda')
        print('Venta : {0}'.format(condicion_venta.value))
        print('Vigente: {0}'.format(estado.value))
    except KeyError:
        print(
            'No fue posible determinar la condicion de venta o estado de medicamentos a procesar'
        )
        return 1
    except ValueError:
        print('No se proporcionó un número de hilos de ejecución válido')
        return 1

    thread = IspParser(sale_terms=condicion_venta, status=estado)
    max_pages = thread.pages_count

    pool = ThreadPool(max_threads, IspParser)
    for i in range(1, max_pages + 1):
        pool.add_task({
            'sale_terms': condicion_venta,
            'status': estado,
            'page_number': i
        })
    pool.wait_completion()
    end = datetime.now()
    print('Tiempo transcurrido: {0}'.format(end - start))
        def __scanfile(self, file):
            scanner = FileScanner(file,  self.outDir,  self.modules,  self.profile, self.pool)
            scanner.scan()
            
if __name__ == '__main__':

    pool = ThreadPool(MAX_THREADS)
    
    if options.inputDir:
        scanner = DirScanner(options.inputDir,  options.outputDir,  MODULES,  options.profile, pool)
        scanner.scan()
    else:
        scanner = FileScanner(options.inputFile,  options.outputDir,  MODULES,  options.profile, pool)
        scanner.scan()
        
    pool.wait_completion()
    
    #Search all results if required
    if options.search:
        if options.search == 'IP':
            pattern = re.compile(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b")
        else:
            pattern = re.compile(options.search)
        ofile = open (os.path.join(options.outputDir, 'SearchHits.txt'), 'w+')
        for root,  dirs,  files in walk(options.outputDir):
            for fname in files:
                if fname <> 'SearchHits.txt':
                    ifile = open(os.path.join(root, fname), 'r')
                    line = 0
                    for text in ifile.readlines():
                        line = line + 1
Пример #6
0
def matchAll(phase, suffix):
    global basics
    global rules
    global matches
    global failures
    global predicates
    global locators
    global noFiles
    global noFilesAffected
    global noUnits
    global noPatternConstraints
    global noPatternConstraintsOk
    global noContentConstraints
    global noContentConstraintsOk
    global noPredicateConstraints
    global noPredicateConstraintsOk
    global noFragments
    if (phase!="basics"):
        basics = tools101.getBasics()
    rules = json.load(open(const101.rulesDump, 'r'))["results"]["rules"]
    matches = list()
    failures = list()
    predicates = set()
    locators = set()
    noFiles = 0
    noUnits = 0
    noFilesAffected = 0
    noPatternConstraints = 0
    noPatternConstraintsOk = 0
    noContentConstraints = 0
    noContentConstraintsOk = 0
    noPredicateConstraints = 0
    noPredicateConstraintsOk = 0
    noFragments = 0

    pool = ThreadPool(4)

    print "Matching 101meta metadata on 101repo (phase \"" + str(phase)+ "\")."
    for root, dirs, files in os.walk(os.path.join(const101.sRoot, "contributions"), followlinks=True):
        if not root.startswith(os.path.join(const101.sRoot, ".git")+os.sep):
            for basename in files:
                noFiles += 1
                if not basename in [".gitignore"]:
                    dirname = root[len(const101.sRoot)+1:]
                    pool.add_task(handleFile, phase, dirname, basename, suffix)
                    #handleFile(phase, dirname, basename, suffix)

    sys.stdout.write('\n')

    pool.wait_completion()

    mr = dict()
    mr["matches"] = matches
    mr["failures"] = failures
    mr["rules"] = rules
    if phase=="predicates":
        mr["predicates"] = list(predicates)
    if phase=="fragments":
        mr["locators"] = list(locators)
    print str(noFiles) + " files examined."
    print str(noFilesAffected) + " files affected."
    print str(len(failures)) + " failures encountered."
    print str(noUnits) + " metadata units attached."
    print str(noContentConstraints) + " content constraints checked."
    print str(noContentConstraintsOk) + " content constraints succeeded."
    print str(noPatternConstraints) + " filename-pattern constraints checked."
    print str(noPatternConstraintsOk) + " filename-pattern constraints succeeded."
    if phase=="predicates":
        print str(noPredicateConstraints) + " predicate constraints checked."
        print str(noPredicateConstraintsOk) + " predicate constraints succeeded."
    if phase=="fragments":
        print str(len(locators)) + " fragment locators exercised."
        print str(noFragments) + " fragment descriptions checked."
    return mr
Пример #7
0
    def runHPCEvent(self):
        tolog("runHPCEvent")
        self.__job.jobState = "running"
        self.__job.setState([self.__job.jobState, 0, 0])
        self.__job.pilotErrorDiag = None
        rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort())
        self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test")

        defRes = self.getDefaultResources()
        if defRes['copy_input_files'] == 'true':
            self.__copyInputFiles = True
        else:
            self.__copyInputFiles = False

        status, output, hpcJob = self.prepareHPCJob()
        if status == 0:
            tolog("HPC Job: %s " % hpcJob)
        else:
            tolog("failed to create the Tag file")
            self.failJob(0, PilotErrors.ERR_UNKNOWN, self.__job, pilotErrorDiag=output)
            return 


        self.__hpcStatus = None
        self.__hpcLog = None

        logFileName = None
        tolog("runJobHPCEvent.getPilotLogFilename=%s"% self.getPilotLogFilename())
        if self.getPilotLogFilename() != "":
            logFileName = self.getPilotLogFilename()
        hpcManager = HPCManager(globalWorkingDir=self.__job.workdir, logFileName=logFileName, poolFileCatalog=self.__poolFileCatalogTemp, inputFiles=self.__inputFilesGlobal, copyInputFiles=self.__copyInputFiles)

        self.__hpcManager = hpcManager
        self.HPCMode = "HPC_" + hpcManager.getMode(defRes)
        self.__job.setMode(self.HPCMode)
        self.__job.setHpcStatus('waitingResource')
        rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443)

        hpcManager.getFreeResources(defRes)
        self.__job.coreCount = hpcManager.getCoreCount()
        self.__job.setHpcStatus('gettingEvents')
        rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443)

        numRanges = hpcManager.getEventsNumber()
        tolog("HPC Manager needs events: %s, max_events: %s; use the smallest one." % (numRanges, defRes['max_events']))
        if numRanges > int(defRes['max_events']):
            numRanges = int(defRes['max_events'])
        eventRanges = self.getEventRanges(numRanges=numRanges)
        #tolog("Event Ranges: %s " % eventRanges)
        if len(eventRanges) == 0:
            tolog("Get no Event ranges. return")
            return
        for eventRange in eventRanges:
            self.__eventRanges[eventRange['eventRangeID']] = 'new'

        # setup stage out
        self.setupStageOutHPCEvent()

        hpcManager.initJob(hpcJob)
        hpcManager.initEventRanges(eventRanges)
        
        hpcManager.submit()
        threadpool = ThreadPool(defRes['stageout_threads'])

        old_state = None
        time_start = time.time()
        while not hpcManager.isFinished():
            state = hpcManager.poll()
            self.__job.setHpcStatus(state)
            if old_state is None or old_state != state or time.time() > (time_start + 60*10):
                old_state = state
                time_start = time.time()
                tolog("HPCManager Job stat: %s" % state)
                self.__JR.updateJobStateTest(self.__job, self.__jobSite, self.__node, mode="test")
                rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort())
                self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443)

            if state and state == 'Complete':
                break
            outputs = hpcManager.getOutputs()
            for output in outputs:
                #self.stageOutHPCEvent(output)
                threadpool.add_task(self.stageOutHPCEvent, output)

            time.sleep(30)
            self.updateHPCEventRanges()

        tolog("HPCManager Job Finished")
        self.__job.setHpcStatus('stagingOut')
        rt = RunJobUtilities.updatePilotServer(self.__job, self.getPilotServer(), self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443)

        outputs = hpcManager.getOutputs()
        for output in outputs:
            #self.stageOutHPCEvent(output)
            threadpool.add_task(self.stageOutHPCEvent, output)

        self.updateHPCEventRanges()
        threadpool.wait_completion()
        self.updateHPCEventRanges()


        if len(self.__failedStageOuts) > 0:
            tolog("HPC Stage out retry 1")
            half_stageout_threads = defRes['stageout_threads'] / 2
            if half_stageout_threads < 1:
                half_stageout_threads = 1
            threadpool = ThreadPool(half_stageout_threads)
            failedStageOuts = self.__failedStageOuts
            self.__failedStageOuts = []
            for failedStageOut in failedStageOuts:
                threadpool.add_task(self.stageOutHPCEvent, failedStageOut)
            threadpool.wait_completion()
            self.updateHPCEventRanges()

        if len(self.__failedStageOuts) > 0:
            tolog("HPC Stage out retry 2")
            threadpool = ThreadPool(1)
            failedStageOuts = self.__failedStageOuts
            self.__failedStageOuts = []
            for failedStageOut in failedStageOuts:
                threadpool.add_task(self.stageOutHPCEvent, failedStageOut)
            threadpool.wait_completion()
            self.updateHPCEventRanges()

        self.__job.setHpcStatus('finished')
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node, 25443)
        self.__hpcStatus, self.__hpcLog = hpcManager.checkHPCJobLog()
        tolog("HPC job log status: %s, job log error: %s" % (self.__hpcStatus, self.__hpcLog))
Пример #8
0
def main(argv):
	#Parse commandline arguments
	#http://www.tutorialspoint.com/python/python_command_line_arguments.htm
	update_existing = False
	try:
		opts, args = getopt.getopt(argv,"h:u:")
	except getopt.GetoptError:
		print "run_processing.py -u <True/False>"
		sys.exit(2)
	for opt, arg in opts:
		if opt == "-h":
			print "run_processing.py -u <True/False>"
		elif opt == "-u":
			if arg == "True":
				update_existing = True

	projects = [d for d in os.listdir(devknowledge.settings.VERSION_CONTROL_REPOS) if os.path.isdir(os.path.join(devknowledge.settings.VERSION_CONTROL_REPOS, d))]

	if update_existing:
		print "Running in-place update."
		manager = None
	else:
		manager = consumerDatabaseThreadManager("knowledge")

	pool = ThreadPool(devknowledge.settings.CONCURRENT_THREADS)

	tip_hashes = {}

	all_files = []
	last_line = []

	#iterate through all projects
	for project in projects:
		if project in devknowledge.settings.PROJECT_FOLDERS:
			all_files, last_lines = Util.returnFilesLastLine(project)
			if update_existing:
				#there were issues with threads duplicating authors/files so we just mass add them at the beginning
				print "Starting initial processing of authors/files."
				Git.createFilesAuthorsInIndex(project, all_files)
				print "Finished creating authors/files."
			number_to_process = len(all_files)
			print "Number of files to process: ", number_to_process
			processed_files = 0
			for i in range(0, len(all_files)):
				file = all_files[i]
				last_line = last_lines[i]
				print "Project: ", project, " ", file, " number lines: ", last_line
				project_type = Util.returnProjectType(devknowledge.settings.VERSION_CONTROL_REPOS+project)
				if project_type == "git":
					#run Git analysis
					if project not in tip_hashes:
						#create new head hash cache
						tip_hashes[project] = Util.returnHeadHash(devknowledge.settings.VERSION_CONTROL_REPOS+project)

					if update_existing:
						Git.storeKnowledge(manager, project, file, 1, last_line, update_existing, tip_hashes[project])
					else:
						pool.add_task(Git.storeKnowledge, manager, project, file, 1, last_line, update_existing, tip_hashes[project])
				elif project_type == "hg":
					#run Mercurial analysis
					if project not in tip_hashes:
						#create new tip hash cache
						tip_hashes[project] = Mercurial.returnTipHash(devknowledge.settings.VERSION_CONTROL_REPOS+project)

					pool.add_task(Mercurial.storeKnowledge, manager, project, file, 1, last_line, tip_hashes[project])

				processed_files += 1
				print "Percent done: %.2f %%" % float(float(processed_files)/float(number_to_process) * 100)
				Util.checkDatabaseFolderSize()
			print "Finishing up writing data to database."
			pool.wait_completion()
			if update_existing:
				#prune database of stale file and author nodes
				print "Starting prune of database."
				if project_type == "git":
					Git.pruneDatabaseStaleFiles(all_files, project)
					Git.pruneDatabaseStaleAuthors()
					Git.pruneAllOtherNodes()
				elif project_type == "hg":
					print "Not yet implemented."

	if manager:
		manager.markForFinish()

	print "Done.  Exiting."
Пример #9
0
def compare_site_thread(old_url,
                        new_url,
                        progress_var=None,
                        step=100.0,
                        thread_pool_csv=None):
    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        return

    # checking multiple sites mode
    if thread_pool_csv:
        thread_pool = thread_pool_csv
    else:
        thread_pool = ThreadPool(settings["THREADPOOL_SIZE"])
    create_path()
    ind = 0

    old_url = old_url.strip()
    new_url = new_url.strip()

    # remove the "/" at the end of the url
    if old_url[-1] == '/':
        old_url = old_url[:-1]
    if new_url[-1] == '/':
        new_url = new_url[:-1]

    # add "http://" before url
    if not old_url.startswith("http"):
        old_url = "http://" + old_url
    if not new_url.startswith("http"):
        new_url = "http://" + new_url

    # print out the information for old and new sites
    entry_print("-----------------------------------------------------", True)
    entry_print("Old URL: " + old_url, True)
    entry_print("New URL: " + new_url, True)
    entry_print("-----------------------------------------------------", True)

    setup_step = step * 0.01
    if progress_var:
        progress_var.set(progress_var.get() + setup_step)

    # check if the new site needs login
    new_test = get_soup(new_url)
    if new_test:
        title = new_test.find("title")
        if title and title.get_text().strip() == "Login":
            entry_print(
                "New site needs login. Please use login mode to check this site!\n",
                True)
            return -1

    setup_step = step * 0.01
    if progress_var:
        progress_var.set(progress_var.get() + setup_step)

    # get the subpages of old and new sites
    try:
        sites = get_sites(old_url)
    except AttributeError:
        entry_print(
            "Can't find the site map from " + old_url +
            ". Please check if the url is valid!", True)
        thread_pool.destroy()
        return
    old_blog = get_blog_site(old_url)
    new_blog = get_blog_site(new_url)

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        thread_pool.destroy()
        return

    blog_exists = False
    if old_blog and new_blog:
        blog_exists = True

    # if urls for subpages are not found
    if sites is None:
        record_error(new_url, "sites")
        if progress_var:
            progress_var.set(progress_var.get() + step)
        return False

    # if blog page is not found
    if old_blog is not None and new_blog is None:
        record_error(new_url, "blog")
    elif old_blog is None and new_blog is not None:
        record_error(old_url, "blog")

    setup_step = step * 0.02
    if progress_var:
        progress_var.set(progress_var.get() + setup_step)

    # print out site information
    entry_print("Site Information: ", True)

    # calculate the step for each page
    step *= 0.96
    if blog_exists:
        page_step = step / 2 / (len(sites) + 1)
        entry_print("Old Blog: " + old_blog, True)
        entry_print("New Blog: " + new_blog, True)
    else:
        page_step = step / (len(sites) + 1)

    entry_print("Number of non-blog pages: " + str(len(sites)), True)

    # check the homepage
    thread_pool.add_task(compare_homepage,
                         old_url=old_url,
                         new_url=new_url,
                         progress_var=progress_var,
                         step=page_step)

    # check all the sites in sitemap
    for site in sites:
        ind += 1
        if site.startswith("/home") or site.startswith("/main"):
            continue

        old_link = old_url + site
        new_link = new_url + site

        thread_pool.add_task(compare_page,
                             old_url=old_link,
                             new_url=new_link,
                             progress_var=progress_var,
                             step=page_step)

    # check all the blog pages
    if blog_exists:
        old_blog_soup = get_soup(old_blog)
        new_blog_soup = get_soup(new_blog)
        compare_blog(old_blog_soup,
                     new_blog_soup,
                     old_blog,
                     new_blog,
                     progress_var=progress_var,
                     step=step / 2)

    # single site mode
    if not thread_pool_csv:
        thread_pool.wait_completion()
        thread_pool.destroy()

    entry_print("-----------------------------------------------------\n")

    return True
Пример #10
0
def main(argv):
    #Parse commandline arguments
    #http://www.tutorialspoint.com/python/python_command_line_arguments.htm
    update_existing = False
    try:
        opts, args = getopt.getopt(argv, "h:u:")
    except getopt.GetoptError:
        print "run_dependencies.py -u <True/False>"
        sys.exit(2)
    for opt, arg in opts:
        if opt == "-h":
            print "run_dependencies.py -u <True/False>"
        elif opt == "-u":
            if arg == "True":
                update_existing = True

    projects = [
        d for d in os.listdir(devknowledge.settings.VERSION_CONTROL_REPOS)
        if os.path.isdir(
            os.path.join(devknowledge.settings.VERSION_CONTROL_REPOS, d))
    ]

    if update_existing:
        print "Running in-place update."
        manager = None
    else:
        manager = consumerDatabaseThreadManager("dependencies")

    pool = ThreadPool(devknowledge.settings.CONCURRENT_THREADS)

    #iterate through all projects
    for project in projects:
        if project in devknowledge.settings.PROJECT_FOLDERS:
            all_files, last_lines = Util.returnFilesLastLine(project)
            c_files = Util.returnCandCPlusPlusFiles(all_files, project)
            number_to_process = len(c_files)
            print "Number of files to process: ", number_to_process
            processed_files = 0
            for file in c_files:
                last_line = last_lines[processed_files]
                #print "Project: ", project, " ", file, " number lines: ", last_line

                if update_existing:
                    FileDependencies.parseFileDependencies(
                        manager, project, file, c_files, update_existing)
                else:
                    pool.add_task(FileDependencies.parseFileDependencies,
                                  manager, project, file, c_files,
                                  update_existing)

                processed_files += 1

                print "Percent done: %.2f %%" % float(
                    float(processed_files) / float(number_to_process) * 100)
                Util.checkDatabaseFolderSize()
            print "Finishing up writing data to database."
            pool.wait_completion()

    if not update_existing:
        manager.markForFinish()
    print "Done.  Exiting."
Пример #11
0
    def runHPCEvent(self):
        tolog("runHPCEvent")
        self.__job.jobState = "running"
        self.__job.setState([self.__job.jobState, 0, 0])
        self.__job.pilotErrorDiag = None
        rt = RunJobUtilities.updatePilotServer(self.__job,
                                               self.getPilotServer(),
                                               self.getPilotPort())
        self.__JR.updateJobStateTest(self.__job,
                                     self.__jobSite,
                                     self.__node,
                                     mode="test")

        defRes = self.getDefaultResources()
        if defRes['copy_input_files'] == 'true':
            self.__copyInputFiles = True
        else:
            self.__copyInputFiles = False

        status, output, hpcJob = self.prepareHPCJob()
        if status == 0:
            tolog("HPC Job: %s " % hpcJob)
        else:
            tolog("failed to create the Tag file")
            self.failJob(0,
                         PilotErrors.ERR_UNKNOWN,
                         self.__job,
                         pilotErrorDiag=output)
            return

        self.__hpcStatus = None
        self.__hpcLog = None

        logFileName = None
        tolog("runJobHPCEvent.getPilotLogFilename=%s" %
              self.getPilotLogFilename())
        if self.getPilotLogFilename() != "":
            logFileName = self.getPilotLogFilename()
        hpcManager = HPCManager(globalWorkingDir=self.__job.workdir,
                                logFileName=logFileName,
                                poolFileCatalog=self.__poolFileCatalogTemp,
                                inputFiles=self.__inputFilesGlobal,
                                copyInputFiles=self.__copyInputFiles)

        self.__hpcManager = hpcManager
        self.HPCMode = "HPC_" + hpcManager.getMode(defRes)
        self.__job.setMode(self.HPCMode)
        self.__job.setHpcStatus('waitingResource')
        rt = RunJobUtilities.updatePilotServer(self.__job,
                                               self.getPilotServer(),
                                               self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node,
                                    25443)

        hpcManager.getFreeResources(defRes)
        self.__job.coreCount = hpcManager.getCoreCount()
        self.__job.setHpcStatus('gettingEvents')
        rt = RunJobUtilities.updatePilotServer(self.__job,
                                               self.getPilotServer(),
                                               self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node,
                                    25443)

        numRanges = hpcManager.getEventsNumber()
        tolog(
            "HPC Manager needs events: %s, max_events: %s; use the smallest one."
            % (numRanges, defRes['max_events']))
        if numRanges > int(defRes['max_events']):
            numRanges = int(defRes['max_events'])
        eventRanges = self.getEventRanges(numRanges=numRanges)
        #tolog("Event Ranges: %s " % eventRanges)
        if len(eventRanges) == 0:
            tolog("Get no Event ranges. return")
            return
        for eventRange in eventRanges:
            self.__eventRanges[eventRange['eventRangeID']] = 'new'

        # setup stage out
        self.setupStageOutHPCEvent()

        hpcManager.initJob(hpcJob)
        hpcManager.initEventRanges(eventRanges)

        hpcManager.submit()
        threadpool = ThreadPool(defRes['stageout_threads'])

        old_state = None
        time_start = time.time()
        while not hpcManager.isFinished():
            state = hpcManager.poll()
            self.__job.setHpcStatus(state)
            if old_state is None or old_state != state or time.time() > (
                    time_start + 60 * 10):
                old_state = state
                time_start = time.time()
                tolog("HPCManager Job stat: %s" % state)
                self.__JR.updateJobStateTest(self.__job,
                                             self.__jobSite,
                                             self.__node,
                                             mode="test")
                rt = RunJobUtilities.updatePilotServer(self.__job,
                                                       self.getPilotServer(),
                                                       self.getPilotPort())
                self.__JR.updatePandaServer(self.__job, self.__jobSite,
                                            self.__node, 25443)

            if state and state == 'Complete':
                break
            outputs = hpcManager.getOutputs()
            for output in outputs:
                #self.stageOutHPCEvent(output)
                threadpool.add_task(self.stageOutHPCEvent, output)

            time.sleep(30)
            self.updateHPCEventRanges()

        tolog("HPCManager Job Finished")
        self.__job.setHpcStatus('stagingOut')
        rt = RunJobUtilities.updatePilotServer(self.__job,
                                               self.getPilotServer(),
                                               self.getPilotPort())
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node,
                                    25443)

        outputs = hpcManager.getOutputs()
        for output in outputs:
            #self.stageOutHPCEvent(output)
            threadpool.add_task(self.stageOutHPCEvent, output)

        self.updateHPCEventRanges()
        threadpool.wait_completion()
        self.updateHPCEventRanges()

        if len(self.__failedStageOuts) > 0:
            tolog("HPC Stage out retry 1")
            half_stageout_threads = defRes['stageout_threads'] / 2
            if half_stageout_threads < 1:
                half_stageout_threads = 1
            threadpool = ThreadPool(half_stageout_threads)
            failedStageOuts = self.__failedStageOuts
            self.__failedStageOuts = []
            for failedStageOut in failedStageOuts:
                threadpool.add_task(self.stageOutHPCEvent, failedStageOut)
            threadpool.wait_completion()
            self.updateHPCEventRanges()

        if len(self.__failedStageOuts) > 0:
            tolog("HPC Stage out retry 2")
            threadpool = ThreadPool(1)
            failedStageOuts = self.__failedStageOuts
            self.__failedStageOuts = []
            for failedStageOut in failedStageOuts:
                threadpool.add_task(self.stageOutHPCEvent, failedStageOut)
            threadpool.wait_completion()
            self.updateHPCEventRanges()

        self.__job.setHpcStatus('finished')
        self.__JR.updatePandaServer(self.__job, self.__jobSite, self.__node,
                                    25443)
        self.__hpcStatus, self.__hpcLog = hpcManager.checkHPCJobLog()
        tolog("HPC job log status: %s, job log error: %s" %
              (self.__hpcStatus, self.__hpcLog))
Пример #12
0
class DroidStager(threading.Thread):
    def __init__(self,
                 globalWorkingDir,
                 localWorkingDir,
                 outputs=None,
                 job=None,
                 esJobManager=None,
                 outputDir=None,
                 rank=None,
                 logger=None):
        threading.Thread.__init__(self)
        self.__globalWorkingDir = globalWorkingDir
        self.__localWorkingDir = localWorkingDir
        self.__currentDir = None
        self.__rank = rank
        if logger and False:
            self.__tmpLog = logger
        else:
            curdir = _abspath(self.__localWorkingDir)
            wkdirname = "rank_%s" % str(self.__rank)
            wkdir = _abspath(_join(curdir, wkdirname))
            self.__tmpLog = Logger.Logger(
                filename=os.path.join(wkdir, 'Droid.log'))
        self.__job = job
        self.__esJobManager = esJobManager
        self.__stop = threading.Event()
        self.__isFinished = False
        self.__tmpLog.info("Rank %s: Global working dir: %s" %
                           (self.__rank, self.__globalWorkingDir))
        os.environ['PilotHomeDir'] = os.path.dirname(self.__globalWorkingDir)

        self.__jobId = None
        self.__copyOutputToGlobal = False
        self.__outputDir = outputDir

        self.__hostname = socket.getfqdn()

        self.__outputs = outputs
        self.__threadpool = None
        self.setup(job)

    def setup(self, job):
        try:
            self.__jobId = job.get("JobId", None)
            self.__yodaToOS = job.get('yodaToOS', False)
            self.__yodaToZip = job.get('yodaToZip', False)
            self.__zipFileName = job.get('zipFileName', None)
            self.__zipEventRangesName = job.get('zipEventRangesName', None)
            self.__tmpLog.debug("Rank %s: zip file %s" %
                                (self.__rank, self.__zipFileName))
            self.__tmpLog.debug("Rank %s: zip event range file %s" %
                                (self.__rank, self.__zipEventRangesName))
            if self.__zipFileName is None or self.__zipEventRangesName is None:
                self.__tmpLog.debug(
                    "Rank %s: either zipFileName(%s) is None or zipEventRanagesName(%s) is None, will not use zip output"
                    % (self.__rank, self.__zipFileName,
                       self.__zipEventRangesName))
                self.__yodaToZip = False
            self.__copyOutputToGlobal = job.get('copyOutputToGlobal', False)

            if self.__yodaToOS:
                setup = job.get('setup', None)
                self.__esPath = job.get('esPath', None)
                self.__os_bucket_id = job.get('os_bucket_id', None)
                self.__report = getInitialTracingReport(
                    userid='Yoda',
                    sitename='Yoda',
                    dsname=None,
                    eventType="objectstore",
                    analysisJob=False,
                    jobId=None,
                    jobDefId=None,
                    dn='Yoda')
                self.__siteMover = objectstoreSiteMover(setup,
                                                        useTimerCommand=False)
                self.__cores = int(job.get('ATHENA_PROC_NUMBER', 1))

                self.__tmpLog.debug("Rank %s: start threadpool" %
                                    (self.__rank))
                self.__threadpool = ThreadPool(self.__cores / 8)

        except:
            self.__tmpLog.error("Failed to setup Droid stager: %s" %
                                str(traceback.format_exc()))

    def copyOutput(self, output, outputs):
        if self.__outputDir:
            for filename in outputs:
                #filename = output.split(",")[0]
                base_filename = os.path.basename(filename)
                new_file_name = os.path.join(self.__outputDir, base_filename)
                is_copied = False
                try:
                    os.rename(filename, new_file_name)
                    is_copied = True
                except:
                    self.__tmpLog.debug(
                        "Rank %s: failed to move output %s to %s, %s" %
                        (self.__rank, filename, new_file_name,
                         str(traceback.format_exc())))
                    is_copied = False
                if not is_copied:
                    shutil.copy(filename, new_file_name)
                    os.remove(filename)
                output = output.replace(filename, new_file_name)
            return 0, output
        elif self.__copyOutputToGlobal:
            for filename in outputs:
                #filename = output.split(",")[0]
                base_filename = os.path.basename(filename)
                new_file_name = os.path.join(self.__globalWorkingDir,
                                             base_filename)
                is_copied = False
                try:
                    os.rename(filename, new_file_name)
                    is_copied = True
                except:
                    self.__tmpLog.debug(
                        "Rank %s: failed to move output %s to %s, %s" %
                        (self.__rank, filename, new_file_name,
                         str(traceback.format_exc())))
                    is_copied = False
                if not is_copied:
                    shutil.copy(filename, new_file_name)
                    os.remove(filename)
                output = output.replace(filename, new_file_name)
            return 0, output
        else:
            if self.__localWorkingDir == self.__globalWorkingDir:
                return 0, output

            for filename in outputs:
                #filename = output.split(",")[0]
                new_file_name = filename.replace(self.__localWorkingDir,
                                                 self.__globalWorkingDir)
                dirname = os.path.dirname(new_file_name)
                if not os.path.exists(dirname):
                    os.makedirs(dirname)
                shutil.copy(filename, new_file_name)
                os.remove(filename)
                output = output.replace(filename, new_file_name)
            return 0, output

    def stageOutToOS(self, outputs):
        ret_status = 0
        ret_outputs = []
        try:
            for filename in outputs:
                ret_status, pilotErrorDiag, surl, size, checksum, arch_type = self.__siteMover.put_data(
                    filename,
                    self.__esPath,
                    lfn=os.path.basename(filename),
                    report=self.__report,
                    token=None,
                    experiment='ATLAS')
                if ret_status == 0:
                    os.remove(filename)
                    ret_outputs.append(surl)
                else:
                    self.__tmpLog.debug("Failed to stageout %s: %s %s" %
                                        (filename, ret_status, pilotErrorDiag))
                    return ret_status, pilotErrorDiag
        except:
            self.__tmpLog.warning(
                "Rank %s: Droid throws exception when staging out: %s" %
                (self.__rank, traceback.format_exc()))
            ret_status = -1
        return ret_status, ret_outputs

    def zipOutputs(self, eventRangeID, eventStatus, outputs):
        try:
            for filename in outputs:
                command = "tar -rf " + self.__zipFileName + " --directory=%s %s" % (
                    os.path.dirname(filename), os.path.basename(filename))
                status, ret = commands.getstatusoutput(command)
                if status:
                    self.__tmpLog.debug("Failed to zip %s: %s, %s" %
                                        (filename, status, ret))
                    return status, ret
                else:
                    os.remove(filename)
        except:
            self.__tmpLog.warning(
                "Rank %s: Droid throws exception when zipping out: %s" %
                (self.__rank, traceback.format_exc()))
            return -1, "Failed to zip outputs"
        else:
            handler = open(self.__zipEventRangesName, "a")
            handler.write("%s %s %s\n" % (eventRangeID, eventStatus, outputs))
            handler.close()
        return 0, outputs

    def stageOut(self, eventRangeID, eventStatus, output, retries=0):
        if eventStatus.startswith("ERR"):
            request = {
                "eventRangeID": eventRangeID,
                'eventStatus': eventStatus,
                "output": output
            }
        else:
            outputs = output.split(",")[:-3]
            if self.__yodaToZip:
                self.__tmpLog.debug("Rank %s: start to zip outputs: %s" %
                                    (self.__rank, outputs))
                retStatus, retOutput = self.zipOutputs(eventRangeID,
                                                       eventStatus, outputs)
                if retStatus != 0:
                    self.__tmpLog.error(
                        "Rank %s: failed to zip outputs %s: %s" %
                        (self.__rank, outputs, retOutput))
                    request = {
                        "jobId": self.__jobId,
                        "eventRangeID": eventRangeID,
                        'eventStatus': eventStatus,
                        "output": output
                    }
                else:
                    self.__tmpLog.info(
                        "Rank %s: finished to zip outputs %s: %s" %
                        (self.__rank, outputs, retOutput))
                    request = {
                        "jobId": self.__jobId,
                        "eventRangeID": eventRangeID,
                        'eventStatus': 'zipped',
                        "output": retOutput
                    }
            elif self.__yodaToOS:
                self.__tmpLog.debug(
                    "Rank %s: start to stage out outputs to objectstore: %s" %
                    (self.__rank, outputs))
                retStatus, retOutput = self.stageOutToOS(outputs)
                if retStatus != 0:
                    self.__tmpLog.error(
                        "Rank %s: failed to stagout outputs %s to objectstore: %s"
                        % (self.__rank, outputs, retOutput))
                    if retries < 1:
                        self.stageOut(eventRangeID,
                                      eventStatus,
                                      output,
                                      retries=retries + 1)
                        request = None
                    else:
                        request = {
                            "jobId": self.__jobId,
                            "eventRangeID": eventRangeID,
                            'eventStatus': eventStatus,
                            "output": output
                        }
                else:
                    self.__tmpLog.info(
                        "Rank %s: finished to stageout outputs %s to objectstore: %s"
                        % (self.__rank, outputs, retOutput))
                    request = {
                        "jobId": self.__jobId,
                        "eventRangeID": eventRangeID,
                        'eventStatus': 'stagedOut',
                        "output": retOutput,
                        'objstoreID': self.__os_bucket_id
                    }
            else:
                self.__tmpLog.debug("Rank %s: start to copy outputs: %s" %
                                    (self.__rank, outputs))
                retStatus, retOutput = self.copyOutputs(output, outputs)
                if retStatus != 0:
                    self.__tmpLog.error(
                        "Rank %s: failed to copy outputs %s: %s" %
                        (self.__rank, outputs, retOutput))
                    request = {
                        "jobId": self.__jobId,
                        "eventRangeID": eventRangeID,
                        'eventStatus': eventStatus,
                        "output": output
                    }
                else:
                    self.__tmpLog.info(
                        "Rank %s: finished to copy outputs %s: %s" %
                        (self.__rank, outputs, retOutput))
                    request = {
                        "jobId": self.__jobId,
                        "eventRangeID": eventRangeID,
                        'eventStatus': eventStatus,
                        "output": retOutput
                    }
        if request:
            self.__outputs.put(request)

    def stop(self):
        self.__stop.set()

    def isFinished(self):
        return self.__isFinished

    def run(self):
        while True:
            try:
                outputs = self.__esJobManager.getOutputs()
                if outputs:
                    self.__tmpLog.debug("Rank %s: getOutputs: %s" %
                                        (self.__rank, outputs))
                    for outputMsg in outputs:
                        try:
                            eventRangeID, eventStatus, output = outputMsg
                            if self.__threadpool:
                                self.__tmpLog.debug(
                                    "Rank %s: add event output to threadpool: %s"
                                    % (self.__rank, outputMsg))
                                self.__threadpool.add_task(self.stageOut,
                                                           eventRangeID,
                                                           eventStatus,
                                                           output,
                                                           retries=0)
                            else:
                                self.stageOut(eventRangeID,
                                              eventStatus,
                                              output,
                                              retries=0)
                        except:
                            self.__tmpLog.warning(
                                "Rank %s: error message: %s" %
                                (self.__rank, traceback.format_exc()))
                            continue
            except:
                self.__tmpLog.error("Rank %s: Stager Thread failed: %s" %
                                    (self.__rank, traceback.format_exc()))
            if self.__stop.isSet():
                if self.__threadpool:
                    self.__tmpLog.warning(
                        "Rank %s: wait threadpool to finish" % (self.__rank))
                    self.__threadpool.wait_completion()
                    self.__tmpLog.warning("Rank %s: threadpool finished" %
                                          (self.__rank))
                break
            time.sleep(1)
        self.__isFinished = True
            d = randrange(1, 10)  # sleep up to 10 sec
            try:
                logger.info('Adding task: sleep for %d secs (task %d)', d, task_num)
                pool.add_task(user, wait_delay, d, task_num)
                real_time_for_jobs += d
            except Exception as e:
                logging.warning(e)
            task_num += 1
            time.sleep(delay)
        logger.info('user %s submitted jobs for %d secs', user, real_time_for_jobs)


    # 1) Init a Thread pool with the desired number of threads and number of users
    pool = ThreadPool(num_of_process, num_of_users, sched_policy, queue_policy)
    producers = []

    for user in range(num_of_users):
        base_time = 20.0  # the slowest user
        delay = base_time / (user + 1)
        p = Process(target=producer, args=(pool, user, delay,))
        producers.append(p)
        p.start()

    # 3) Wait for completion
    [p.join() for p in producers]

    pool.kill()
    pool.wait_completion()

    pool.scheduler.print_statistics()