# lamenvfile.write(lamSuffix) # lamenvfile.flush() # lamenvfile.close() # return lamSuffix ## Starting. First, the very first run. issue_echo('starting', tmpDir) # NCPU, MAX_NUM_PROCS = set_defaults_lam(tmpDir) try: counterApplications.add_to_counter_log('ADaCGH2', tmpDir, socket.gethostname()) except: None issue_echo('at 2', tmpDir) # lamSuffix = generate_lam_suffix(tmpDir) issue_echo('at 3', tmpDir) time.sleep(random.uniform(0.1, 15)) ## Break ties if starting at identical times check_room = my_queue(MAX_adacgh, MAX_DURATION_TRY = MAX_DURATION_TRY_ADaCGH) issue_echo('after check_room', tmpDir) if check_room == 'Failed':
## The following does not work. We would need to caputer the output ## from ps, and then get substring with -sessionsuffix and the number = lamSuffix. ## But killing lam kills all slaves and the main process ## lamdpid = os.popen('ps --ppid ' + str(lampid) + ' -o "%p" --no-headers').readline() ## time.sleep(0.5) ## general cleaning # buried = os.system("/http/mpi.log/buryThem2.py") # killedlamandr = os.system('/http/mpi.log/killOldLamAllMachines.py') # cleaned_dirs = os.system('/http/mpi.log/delete_old_dirs.py') try: counterApplications.add_to_counter_log(application, tmpDir, socket.gethostname()) except: None startedOK = False time.sleep(random.uniform(0, 1)) ## to prevent truly simultaneous from crashing MPI for i in range(int(numtries)): os.system('touch ' + tmpDir + '/numtries_' + str(i)) ## debug # lamSuffix = str(int(time.time())) + str(os.getpid()) + \ # str(random.randint(10, 999999)) # lamenvfile = open(tmpDir + '/lamSuffix', mode = 'w') # lamenvfile.write(lamSuffix) # lamenvfile.flush() # lamenvfile.close() # lamenv = os.putenv('LAM_MPI_SESSION_SUFFIX', lamSuffix)
################################################################### ################################################################### issue_echo('pomelo_run2.py pid = '+ str(os.getpid()), tmpDir) ## killedlamandr = os.system('/http/mpi.log/killOldLam.py') # os.system("cd " + tmpDir + "; touch about_to_call_buryPom") # os.system("cd " + tmpDir + "; /http/mpi.log/buryPom.py; touch just_called_buryPom") try: counterApplications.add_to_counter_log('PomeloII-' + test_type, tmpDir, socket.gethostname()) except: None count_mpi_crash = 0 if test_type in limma_tests: R_launch = R_pomelo_bin + " CMD BATCH --no-restore --no-readline --no-save -q limma_functions.R" fullPomelocommand = "cd " + tmpDir + "; " + R_launch issue_echo(' about to do fullPomelocommand', tmpDir) os.system(fullPomelocommand) issue_echo(' just did fullPomelocommand', tmpDir) else: ## we use MPI startedOK = False issue_echo('starting else loop', tmpDir)