Exemplo n.º 1
0
        sj_ec2.submit_job(jd_ec2)
        jobs_ec2.append(sj_ec2)
        number_started_jobs = number_started_jobs + 1

    for i in range(0, NUMBER_JOBS_GRID):
        print "Start job no.: " +str(number_started_jobs + 1) + " on Grid"
        sj_tg = bigjob.subjob(advert_host)
        jd.output = "stdout_tg.txt."+str(number_started_jobs+1)
        jd.error = "stderr_tg.txt."+str(number_started_jobs+1)
        sj_tg.submit_job(bj_tg.pilot_url, jd)
        jobs_tg.append(sj_tg)
        number_started_jobs = number_started_jobs + 1

    for i in range(0, NUMBER_JOBS_CONDOR):
        print "Start job no.: " +str(number_started_jobs + 1) + " on Condor"
        sj_condor = bigjob_condor.subjob(bigjob=bj_condor)
        jd_condor.output = "stdout_condor.txt."+str(number_started_jobs+1)
        jd_condor.error = "stderr_condor.txt."+str(number_started_jobs+1)
        sj_condor.submit_job(jd_condor)
        jobs_condor.append(sj_condor)
        number_started_jobs = number_started_jobs + 1

    """ Monitor Progress """
    while True:
        try:
            print "*******************************************************************************************************"
            print "Pilot Job/BigJob URL: " + bj_ec2.pilot_url + " State: " + str(bj_ec2.get_state_detail()) + " Time since launch: " + str(time.time()-start)
            print "Pilot Job/BigJob URL: " + bj_nimbus.pilot_url + " State: " + str(bj_nimbus.get_state_detail()) + " Time since launch: " + str(time.time()-start)
            print "Pilot Job/BigJob URL: " + bj_tg.pilot_url + " State: " + str(bj_tg.get_state_detail()) + " Time since launch: " + str(time.time()-start)
            print "Pilot Job/BigJob URL: " + bj_condor.pilot_url + " State: " + str(bj_condor.get_state_detail()) + " Time since launch: " + str(time.time()-start)
Exemplo n.º 2
0
	jd.arguments = ["NPT.conf"]
	jd.working_directory = "/work/lukas/saga/bigjob/data"
	jd.output = "namd.$(CLUSTER).$(PROCESS).$(NODE).out"
	jd.error = "namd.$(CLUSTER).$(PROCESS).$(NODE).err"

	attr = open(CONDOR_BIN + "/condor_attr", "w")
	attr.write("universe = parallel\n")
	attr.write("machine_count = 2\n")			# number of nodes (not cores)
	attr.write("+WantParallelSchedulingGroups = True\n")	# to avoid running on nodes from different clusters
	attr.close()

	jobs = []
	for i in range (0, NUMBER_JOBS):
		print "Start job no.: " + str(i)
		print time.ctime()
		sj = bigjob_condor.subjob(bigjob=bj)
		sj.submit_job(jd)
		jobs.append(sj)

	# busy wait for completion
	sj = jobs.pop()
	while 1:
		try:
			state = str(sj.get_state())
			print "state: " + state
			if state=="Failed" or state=="Done" or state=="Canceled" or state=="Suspended":
				if len(jobs) > 0:
					sj = jobs.pop()
					continue
				else:
					break