def done_jobs(jobs): ''' Retrive the result of finished jobs from picloud. ''' statuses = cloud.status(list(jobs)) return list( cloud.iresult([ y[0] for y in filter(lambda x: x[1] == 'done', zip(list(jobs), statuses)) ]))
def check_job_itemdetail_status(self): selleritemdetailsjobs_completed = 0 newsellerlist=[] filenamelist=[] for seller in self.successsellers: if seller.get('jobcompletecheck', True) == False: newsellerlist.append(seller) else: cloud.cloud.cloudLog.critical("job failed for seller "+str(seller)) if seller.get('itemdetailjobid'):cloud.kill(seller['itemdetailjobid']) if seller.get('itemdetailjoblist'):cloud.kill(seller['itemdetailjoblist']) while selleritemdetailsjobs_completed != len(newsellerlist): try: for seller in newsellerlist: if seller.get('jobcompletecheck', True) == False: sellerjobfailedcount = 0 sellercompletedjobs = 0 for jobid in seller['itemdetailjoblist']: if cloud.status(jobid) in ('error', 'stalled', 'killed'): sellerjobfailedcount += 1 sellercompletedjobs += 1 if cloud.status(jobid) in ('done'): sellercompletedjobs += 1 if sellerjobfailedcount > SELLERITEMDETAIL_ALLOWED_JOB_FAIL_COUNT: update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR")) cloud.kill(seller['itemdetailjoblist']) selleritemdetailsjobs_completed += 1 seller['jobcompletecheck'] = True logger.exception(seller['sellerid'] + " jobs kill after two job failed") elif sellercompletedjobs >= len(seller['itemdetailjoblist']): selleritemdetailsjobs_completed += 1 seller['jobcompletecheck'] = True cloud.delete(seller['itemdetailjobid']) self.download_dumps(seller['filenamelist']) filenamelist+=seller['filenamelist'] update_sellers_status([seller], dict(phase="INDEXING", progress="ENDED")) print "Job detail wait loop" time.sleep(3) except Exception, e: print e
def status(jids): naccounts = len(jids) status = [] # Retrieves the results for i in xrange(naccounts): api_key = api_keys[i] api_secretkey = api_secretkeys[i] cloud.setkey(api_key=api_key, api_secretkey=api_secretkey) print "Retrieving status for account %d..." % (i + 1) status.extend(cloud.status(jids[i])) return status
def track(batch, remote, debug): try: if (not remote): logging.info("cannot track locally") return jobs = cache.get("batch/%s/jobs" % batch, remote) k_ = jobs['train'] + jobs['validate'] + jobs['test'] + [jobs['report']] status_ = cloud.status(k_) count = co.Counter(status_) print count except: print "status failed" pass
def run_ip(): #Figure out how many jobs I want to create and how many requests per job job_count = int(sys.argv[1]) job_rows = range(0, job_count) #Now actually map them to run in the cloud #The "s1" type gives unique IP addresses. Eek print "Creating job map for {0} jobs.".format(len(job_rows)) jids = cloud.map(download_ip, job_rows, _type="s1") print "Waiting for jobs to complete." #The possible statuses and the statuses we are waiting for possible_job_statutes = ["waiting", "queued", "processing", "done", "error", "killed", "stalled"] pending_job_statuses = Set(["waiting", "queued", "processing"]) #Keep looping until no job statuses are in the pending_job_statuses statuses = [] while True: statuses = cloud.status(jids) tally = Counter() for status in statuses: tally[status] += 1 print "Status of jobs: " + str(tally) #If none of the statuses are in pending_job_statuses, we are done! if len(pending_job_statuses.intersection(Set(statuses))) == 0: break #Wait for 5 seconds between checks sleep(5) #Now loop through the jobs and retrieve the results ip_counter = Counter() results = cloud.result(jids) for result in results: ip_counter[result] += 1 print "IP Addresses: " + str(ip_counter)
def cloud_status(jids): stati = cloud.status(jids) s = Counter() for st in stati: s[st] += 1 return s
def cloud_status(jids): s = Counter(cloud.status(jids)) return s
def test_exception4(): '''Raise CloudException since cloud.status called for job that does not exist''' cloud.status(100000)
def test_exception3(): '''Raise TypeError since cloud.status called with 1 invalid argument''' jid = cloud.status("asdf")
def run(): #Import the data we need #Figure out how many jobs I want to create and how many requests per job job_count = int(sys.argv[1]) req_per_row = int(sys.argv[2]) #Probably an easier way to do this... #For each job, add the requests to that array and append it to job_rows print "Generating data for {0} jobs".format(job_count) job_rows = [] for i in xrange(0, job_count): job = [] while len(job) < req_per_row and len(needed_rows) > 0: job.append(needed_rows.pop()) job_rows.append(job) if len(needed_rows) == 0: break #Print status of jobs print "Created {0} jobs with {1} distances each, so {2} distances to be processed.".format(len(job_rows),req_per_row,sum([len(job) for job in job_rows])) #Now actually map them to run in the cloud #The "s1" type gives unique IP addresses per job. Eek print "Creating job map" jids = cloud.map(download, job_rows, _type="s1") print "Waiting for jobs to complete." #The possible statuses and the statuses we are waiting for possible_job_statutes = ["waiting", "queued", "processing", "done", "error", "killed", "stalled"] pending_job_statuses = Set(["waiting", "queued", "processing"]) #Keep looping until no job statuses are in the pending_job_statuses statuses = [] while True: statuses = cloud.status(jids) tally = Counter() for status in statuses: tally[status] += 1 print "Status of jobs: " + str(tally) #If none of the statuses are in pending_job_statuses, we are done! if len(pending_job_statuses.intersection(Set(statuses))) == 0: break #Wait for 5 seconds between checks sleep(5) #Now loop through the jobs and retrieve the results saved = 0 for index in xrange(0, len(statuses)): print "Working on job {0} of {1}".format(index+1, len(statuses)) jid = jids[index] status = statuses[index] #If it's not "done", then there must have been an error if status != "done": print "Status of jid {0} = {1}.".format(jid, status) continue results = cloud.result(jid) print "There are {0} results.".format(len(results)) for result in results: #Make sure we aren't over the limits or nothing went wrong if result["status"] != "OK": print('result["status"] == ' + result["status"]) continue elem = result["rows"][0]["elements"][0] if elem["status"] != "OK": print('elem["status"] == ' + elem["status"]) continue dist = elem["distance"]["value"] dur = elem["duration"]["value"] a = result["a"] b = result["b"] walk = result["walk"] if a not in distances: distances[a] = {} if b not in distances[a]: distances[a][b] = {} distances[a][b][walk] = {"walk":walk,"distance":dist,"duration":dur,"a":a,"b":b} saved += 1 try: save_distances(distances) except: print "Couldn't save distances." print "Saved {0:,d} distance with {1:,d} remaining".format(saved, len_needed_rows-saved)
# look at how long it takes to run locally %time segment_stats(valid_segments[0], None) # <codecell> # here's how to run it on PiCloud # Prerequisite: http://docs.picloud.com/primer.html <--- READ THIS AND STUDY TO REFRESH YOUR MEMORY import cloud jid = cloud.call(segment_stats, '1346823845675', None, _env='/rdhyee/Working_with_Open_Data') # <codecell> # pull up status -- refresh until done cloud.status(jid) # <codecell> # this will block until job is done or errors out cloud.join(jid) # <codecell> # get your result cloud.result(jid) # <codecell> # get some basic info
seller['itemdetailjoblist'] = jobtempids seller['jobcompletecheck'] = False seller['filenamelist'] = jobtempfilenames except Exception, e: update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR")) #cloud.join(itemdetailjobids,ignore_errors=False) successfullsellerfilenamelist=self.check_job_itemdetail_status() #self.delete_job_related_data_in_picloud(jobids, mainjobids, itemdetailjobids) except Exception,e: print e finally: for seller in self.successsellers: if cloud.status(seller['jobid']) not in ('done','error', 'stalled', 'killed'): cloud.kill(seller['jobid']) update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR")) logger.critical("Job of "+str(seller['sellerid'])+" killed although running as there were exception from client rest call") removelockfile() return successfullsellerfilenamelist def delete_job_related_data_in_picloud(self, sellerjobids, queuejobdis, itemdetailjobids): if itemdetailjobids: cloud.delete(sellerjobids) if queuejobdis: cloud.delete(queuejobdis) if itemdetailjobids: cloud.delete(itemdetailjobids) def check_job_itemdetail_status(self):
import exposure #exposure.exposure() import cloud from pylab import show from Qfunction import qfuncimage from numpy import imag, real N = 100 # number of times to run simulation args = 1024 arglist = [1024]*N #each call uses the same argument in our case jids = cloud.map(exposure.exposure, arglist) all(item == 'done' for item in cloud.status(jids)) dataout = cloud.result(jids) img = qfuncimage(real(dataout),imag(dataout),20) show(img)
def run(): #Import the data we need import_data() global rows #This was to convert the csvfile with commas as delimiters to "|" as delimiters """ output = open("output.csv", "wb") rowwriter = csv.writer(output, delimiter='|') for row in rows: if len(row) != 5: print "Skipping row with len " + str(len(row)) continue a = row[0]+","+row[1] b = row[2]+","+row[3] walk = row[4] rowwriter.writerow([a, b, walk]) output.close() print "Done" exit() """ distances = import_distances() #print "rows:" #print rows #Find the distances from the csv file not in the json file print "Finding distances which need to be downloaded" needed_rows = [] ig = 0 #Find which distances still need to be downloaded for row in rows: #print row song_title = row[0] artist_name = row[1] #walk = row[2] #If it's in distances, then it's been processed and we don't need it if song_title in distances and artist_name in distances[song_title]:# and walk in distances[a][b]: continue needed_rows.append(row) #Print the results print "There are a total of {0:,d} songs.".format(len(rows)) print "There are {0:,d} downloaded songs.".format(sum([len(d) for d in distances])) print "Ignored {0:,d} songs since len != 3.".format(ig) print "There are {0:,d} songs needing to be processed.".format(len(needed_rows)) len_needed_rows = len(needed_rows) if len_needed_rows <= 0: print "Done" exit() #Figure out how many jobs I want to create and how many requests per job job_count = int(sys.argv[1]) req_per_row = int(sys.argv[2]) #Probably an easier way to do this... #For each job, add the requests to that array and append it to job_rows print "Generating data for {0} jobs".format(job_count) job_rows = [] for i in xrange(0, job_count): job = [] while len(job) < req_per_row and len(needed_rows) > 0: job.append(needed_rows.pop()) job_rows.append(job) if len(needed_rows) == 0: break #Print status of jobs print "Created {0} jobs with {1} songs each, so {2} songs to be processed.".format(len(job_rows),req_per_row,sum([len(job) for job in job_rows])) #Now actually map them to run in the cloud #The "s1" type gives unique IP addresses per job. Eek print "Creating job map" #print job_rows jids = cloud.map(download, job_rows, _type="s1") print "Waiting for jobs to complete." #The possible statuses and the statuses we are waiting for possible_job_statutes = ["waiting", "queued", "processing", "done", "error", "killed", "stalled"] pending_job_statuses = Set(["waiting", "queued", "processing"]) #Keep looping until no job statuses are in the pending_job_statuses statuses = [] while True: statuses = cloud.status(jids) tally = Counter() for status in statuses: tally[status] += 1 print "Status of jobs: " + str(tally) #If none of the statuses are in pending_job_statuses, we are done! if len(pending_job_statuses.intersection(Set(statuses))) == 0: break #Wait for 5 seconds between checks sleep(5) #Now loop through the jobs and retrieve the results saved = 0 for index in xrange(0, len(statuses)): print "Working on job {0} of {1}".format(index+1, len(statuses)) jid = jids[index] status = statuses[index] #If it's not "done", then there must have been an error if status != "done": print "Status of jid {0} = {1}.".format(jid, status) continue results = cloud.result(jid) print "There are {0} results.".format(len(results)) for result in results: #Make sure we aren't over the limits or nothing went wrong #if result["status"] != "OK": # print('result["status"] == ' + result["status"]) # continue #elem = result["rows"][0]["elements"][0] #if elem["status"] != "OK": # print('elem["status"] == ' + elem["status"]) # continue #dist = elem["distance"]["value"] #dur = elem["duration"]["value"] #a = result["artist_name"] #b = result["b"] #walk = result["walk"] #if a not in distances: # distances[a] = {} #if b not in distances[a]: # distances[a][b] = {} #distances[a][b][walk] = {"walk":walk,"distance":dist,"duration":dur,"a":a,"b":b} song = {'artist_name':result['artist_name'],'song_title':result['song_title'],'viewCount':result['viewCount'],'likeCount':result['likeCount']} #print song saved += 1 try: #print song['viewCount'] if song['viewCount'] > 0: save_distances(song) #print song except: print "Couldn't save distances." print "Saved {0:,d} distance with {1:,d} remaining".format(saved, len_needed_rows-saved)
def cloud_status(jids): s=Counter(cloud.status(jids)) return s
def cloud_status(jids): stati=cloud.status(jids) s=Counter() for st in stati: s[st]+=1 return s
def done_jobs(jobs): ''' Retrive the result of finished jobs from picloud. ''' statuses = cloud.status(list(jobs)) return list(cloud.iresult([y[0] for y in filter(lambda x: x[1]=='done',zip(list(jobs),statuses))]))
''' Created on Sep 14, 2012 @author: mudassar ''' import time import cloud from etl.config.drivers.picloud import setcloudkey def testruntime(**kwargs): i=0 while i!=5: cloud.cloud.cloudLog.info("test run time") print "test run time" time.sleep(1) i+=1 if __name__ == '__main__': setcloudkey() jobid=cloud.call(testruntime,a=1,_max_runtime=1,_label="TESTRUNTIME") cloud.join(jobid) print cloud.status(jobid) pass
# IPython log file import exposure exposure() exposure.exposure() import cloud jid = cloud.call(exposure) jid = cloud.call(exposure.exposure) print jid cloud.status() cloud.status(jid) cloud.result(jid) exposure.exposure() exposure.exposure(1024) exposure.exposure(512) exposure.exposure(1024) args = 1024 N = 100 zip([args]*N) jids = cloud.map(exposure.exposure, *zip(*([args]*N))) zip(args*N) zip([args]*N) jids = cloud.map(exposure.exposure, zip(([args]*N))) jids.status() jids cloud.status(jids) cloud.result(jids) jids = cloud.map(exposure.exposure, *zip(([args]*N))) cloud.status(jids) cloud.result(jids) jids = cloud.map(exposure.exposure, *zip(*([args]*N)))
def square(x): return x*x # demonstration of cloud.call() import cloud jid = cloud.call(square,3) print 'Job Id::', jid print 'Job status',cloud.status(jid) print 'Result',cloud.result(jid)
import cloud # print the status of a job known to exist status = cloud.status(1) print status
def test_exception2(): '''Raise TypeError since cloud.status called without arguments''' cloud.status()
def test_multiply(): jid = cloud.call(lambda: 3*3) cloud.result(jid) result = cloud.status(jid) assert result == "done"
# look at how long it takes to run locally %time segment_stats(valid_segments[0], None) # <codecell> # here's how to run it on PiCloud # Prerequisite: http://docs.picloud.com/primer.html <--- READ THIS AND STUDY TO REFRESH YOUR MEMORY import cloud jid = cloud.call(segment_stats, '1346823845675', None, _env='Working_with_Open_Data') # <codecell> # pull up status -- refresh until done cloud.status(jid) # <codecell> # this will block until job is done or errors out cloud.join(jid) # <codecell> # get your result cloud.result(jid) # <codecell> # get some basic info