def wait_for_hit_response(puid, quid, cstage, request_params): global failover_retrylimit, failover_interval, wait4res_duration, poll4res_interval global hit_host, hit_urlloc, cs_dbhost, cs_dbname, clock execres = False scnt = cnt = 0 msgstr = "" while cnt < failover_retrylimit and execres == False: # send a request to HITS resp = mdlib.httpsend(hit_host, hit_urlloc, request_params, clock) mdlib.log("* HIT request has been registered") # response from HITS if resp[0] == 200: # announce the hit task link to the listeners if cstage.listener != None: announce_hittask_links(puid, quid, cstage.listener) # check CS_event table to get the results from AMT. sqlstmt = "select msg from CS_event where action='regHIT' and pid={0} and qid={1}".format( puid, quid) while scnt < (wait4res_duration / poll4res_interval): hitdata = mdlib.get_single_mysql_data(cs_dbhost, cs_dbname, sqlstmt) if hitdata != None: #print data mdlib.log( "* HIT results have been received msg={0}".format( hitdata[0])) msgstr = hitdata[0] execres = True break else: if scnt == 0: mdlib.log( "* waiting for HIT result, scnt={0}/{1}".format( scnt, (wait4res_duration / poll4res_interval))) scnt = scnt + 1 time.sleep(poll4res_interval) # every 5 sec. elif resp[0] == 404: mdlib.log( '! will not retry since there is no service page (errcode=404)' ) cnt = failover_retrylimit # no more retry else: mdlib.log('! HITS returned error: code={0} msg={1}'.format( resp[0], resp[1])) # prepare next iteration cnt = cnt + 1 if execres == False: time.sleep(failover_interval) scnt = 0 del resp return msgstr, execres
def wait_for_hit_response(puid, quid, cstage, request_params): global failover_retrylimit, failover_interval, wait4res_duration, poll4res_interval global hit_host, hit_urlloc, cs_dbhost, cs_dbname, clock execres = False scnt = cnt = 0 msgstr = "" while cnt < failover_retrylimit and execres == False: # send a request to HITS resp = mdlib.httpsend(hit_host, hit_urlloc, request_params, clock) mdlib.log("* HIT request has been registered") # response from HITS if resp[0] == 200: # announce the hit task link to the listeners if cstage.listener != None: announce_hittask_links(puid, quid, cstage.listener) # check CS_event table to get the results from AMT. sqlstmt = "select msg from CS_event where action='regHIT' and pid={0} and qid={1}".format(puid, quid) while scnt < (wait4res_duration/poll4res_interval): hitdata = mdlib.get_single_mysql_data(cs_dbhost, cs_dbname, sqlstmt) if hitdata != None: #print data mdlib.log("* HIT results have been received msg={0}".format(hitdata[0])) msgstr = hitdata[0] execres = True break else: if scnt == 0: mdlib.log("* waiting for HIT result, scnt={0}/{1}".format(scnt, (wait4res_duration/poll4res_interval))) scnt = scnt + 1 time.sleep(poll4res_interval) # every 5 sec. elif resp[0] == 404: mdlib.log('! will not retry since there is no service page (errcode=404)') cnt = failover_retrylimit # no more retry else: mdlib.log('! HITS returned error: code={0} msg={1}'.format(resp[0], resp[1])) # prepare next iteration cnt = cnt + 1 if execres == False: time.sleep(failover_interval) scnt = 0 del resp return msgstr, execres
def main(): taskpath, instances, deadline = parse_args() print taskpath, instances, deadline chids = [[]] * instances; for chidx in range(instances): pid = os.fork() if pid == 0: # child mdserv.task_main(taskpath) else: # parent mdlib.log('* created {0}th task id={1}.'.format(chidx, pid)) chids[chidx] = pid # parent will wait until the deadline if pid != 0: cnt = 0 while True: time.sleep(1) cnt = cnt + 1 # deadline check.. if cnt == deadline: mdlib.log('! deadline ({0} seconds) passes.'.format(deadline)) for cid in chids: os.kill(cid, signal.SIGTERM) mdlib.log('* task id {0} is killed'.format(cid)) break #mdlib.log('* checked {0}/{1}'.format(cnt, deadline)) mdlib.log('* exits medusa task runner.')
def announce_hittask_links(pid, qid, listeners): # get url. sqlstmt = "select msg from CS_event where action='gotoHIT' and pid={0} and qid={1}".format(pid, qid) content = mdlib.get_single_mysql_data(cs_dbhost, cs_dbname, sqlstmt) if content != None: # send it to the listeners emails = listeners.split(',') for i in range(len(emails)): mdlib.send_email("Medusa HIT task has been posted", content[0], emails[i]) mdlib.log("* Weblink for HIT task has been announced: {0}".format(listeners)) else: mdlib.log("! No registered link, please verify Worker Manager is running properly")
def update_amt_cred(pid): global cs_dbhost, cs_dbname global ENV sqlstmt = "select envkey,envval from CS_globalvars where pid={0}".format(pid) con, cur = mdlib.get_raw_mysql_data(cs_dbhost, cs_dbname, sqlstmt) dat = cur.fetchone() while dat != None: ENV[dat[0]] = dat[1] #mdlib.log("* amtreq: key={0}, val={1}".format(dat[0], dat[1])) dat = cur.fetchone() con.close() mdlib.log("* AMT credential has been updated.")
def pullout_db(puid, quid): global failover_retrylimit, failover_interval, wait4res_duration, poll4res_interval global spc_host, spc_urlloc, cs_dbhost, cs_dbname, clock execres = False scnt = cnt = 0 msgstr = None sqlstmt = "select msg from CS_event where action='completeTask' and pid={0} and qid={1} and userid='{2}'" \ .format(puid, quid, ENV["W_WID"]) retdat = mdlib.get_single_mysql_data(cs_dbhost, cs_dbname, sqlstmt) if retdat != None: mdlib.log("* RS results user={0}, msg={1}".format(ENV["W_WID"], retdat[0])) msgstr = retdat[0] execres = True return msgstr, execres
def parse_args(): # input parameter parsing. for i in range(len(sys.argv)): if i == 0: continue if sys.argv[i] == "-h": display_usage() elif sys.argv[i][0] != '-': filename = sys.argv[i] break if filename and os.path.isfile(filename): mdlib.log('* verified input file: [' + filename + ']') else: mdlib.log('! ' + fname + 'is not a file') exit() return filename
def wait_for_mst_response(puid, quid, rparams_dict, cstage_config_output): global failover_retrylimit, failover_interval, wait4res_duration, poll4res_interval global spc_host, spc_urlloc, cs_dbhost, cs_dbname, clock execres = False scnt = cnt = 0 msgstr = None while cnt < failover_retrylimit and execres == False: scnt = 0 while scnt < (wait4res_duration / poll4res_interval): # check CS_event table to get the results from Aqua Clients. sqlstmt = "select msg from CS_event where action='completeTask' and pid={0} and qid={1} and userid='{2}'" \ .format(puid, quid, ENV["W_WID"]) retdat = mdlib.get_single_mysql_data(cs_dbhost, cs_dbname, sqlstmt) if retdat != None: mdlib.log("* RS results user={0}, msg={1}".format( ENV["W_WID"], retdat[0])) msgstr = retdat[0] execres = True break else: if scnt == 0: mdlib.log("* waiting: cnt={0}/{1},{2}th try".format( scnt, (wait4res_duration / poll4res_interval), cnt)) scnt = scnt + 1 time.sleep(poll4res_interval) # cnt = cnt + 1 if execres == False: time.sleep(failover_interval) # retry scnt = 0 retrycnt = 0 ruser = ENV["W_WID"] resp = mdlib.httpsend(spc_host, spc_urlloc, rparams_dict, clock) if resp[0] == 200: retrycnt = retrycnt + 1 mdlib.log( "* retry sms cmd msg will be sent to userid={0}".format( ruser)) else: mdlib.log('! retry request failed, uid={0}, code={1}, msg={2}'. format(ruser, resp[0], res[1])) return msgstr, execres
def interpret_result(msgstrs, cstages, prev_params_dict): global ENV # msgstr has { 'userid' : 'uidlist' } structure.. pdict = prev_params_dict.copy() for i in range(len(cstages)): # amt_recruit if cstages[i].stype == 'recruit': users = msgstrs[0].split('#') if cstages[i].config_output and len(users) > 0: ENV[cstages[i].config_output] = users[0] pdict = dict() # amt_curate elif cstages[i].stype == 'vote': ans = msgstrs[i].split('#') bitmask = None for j in range(len(ans)): yesno = ans[j].split(',') yes_cnt = yesno[0].split('-')[1] no_cnt = yesno[1].split('-')[1] if yes_cnt < no_cnt: bitmask = "0" if bitmask == None else "{0}|0".format(bitmask) else: bitmask = "1" if bitmask == None else "{0}|1".format(bitmask) mdlib.log("* bitmask: " + bitmask) if cstages[i].config_output != None: key = cstages[i].config_output pdict[key] = bitmask # anything else else: # put { '<output>' : 'uidlist' } into the params_dict.. if cstages[i].config_output != None: pdict[cstages[i].config_output] = msgstrs[i] return pdict
def parse_args(): global task_tracker_id # input parameter parsing. for i in range(len(sys.argv)): if i == 0: continue if sys.argv[i] == "-h": display_usage() elif sys.argv[i][0] != '-': filename = sys.argv[i] break if i < len(sys.argv) - 1: task_tracker_id = sys.argv[len(sys.argv) - 1] print "task tracker id is " + task_tracker_id if filename and os.path.isfile(filename): mdlib.log('* verified input file: [' + filename + ']') else: mdlib.log('! ' + fname + 'is not a file') exit() return filename
def wait_for_mst_response(puid, quid, rparams_dict, cstage_config_output): global failover_retrylimit, failover_interval, wait4res_duration, poll4res_interval global spc_host, spc_urlloc, cs_dbhost, cs_dbname, clock execres = False scnt = cnt = 0 msgstr = None while cnt < failover_retrylimit and execres == False: scnt = 0 while scnt < (wait4res_duration/poll4res_interval): # check CS_event table to get the results from Aqua Clients. sqlstmt = "select msg from CS_event where action='completeTask' and pid={0} and qid={1} and userid='{2}'" \ .format(puid, quid, ENV["W_WID"]) retdat = mdlib.get_single_mysql_data(cs_dbhost, cs_dbname, sqlstmt) if retdat != None: mdlib.log("* RS results user={0}, msg={1}".format(ENV["W_WID"], retdat[0])) msgstr = retdat[0] execres = True break else: if scnt == 0: mdlib.log("* waiting: cnt={0}/{1},{2}th try".format(scnt, (wait4res_duration/poll4res_interval), cnt)) scnt = scnt + 1 time.sleep(poll4res_interval) # cnt = cnt + 1 if execres == False: time.sleep(failover_interval) # retry scnt = 0 retrycnt = 0 ruser = ENV["W_WID"] resp = mdlib.httpsend(spc_host, spc_urlloc, rparams_dict, clock) if resp[0] == 200: retrycnt = retrycnt + 1 mdlib.log("* retry sms cmd msg will be sent to userid={0}".format(ruser)) else: mdlib.log('! retry request failed, uid={0}, code={1}, msg={2}'.format(ruser, resp[0], res[1])) return msgstr, execres
def task_main(taskpath): global data_base_url, spc_host, spc_urlloc, hit_host, hit_urlloc global wait4res_duration, poll4res_interval global failover_interval, failover_retrylimit global debug, anonymity, clock, ENV global soc2, cs_dbhost, cs_dbname global task_tracker_id mdlib.log("********************************") mdlib.log("* Task-Flow Server *") mdlib.log("********************************") mdlib.log("* starting task-flow server(TFS).") pid = os.getpid() print "++++pid+++++++" + str(pid) msend(soc2, str(pid) + " " + task_tracker_id) ###################################################### # Program Start # # load envvars vdict = get_env_values( "BASE-URL-DATA,SPC-HOST,SPC-URI,HIT-HOST,HIT-URI,FAIL-SLEEP-INTERVAL,FAIL-RETRY-CNT,WAIT-DURATION-FOR-STAGE,CHECK-PERIOD-FOR-STAGE-OUTPUT" ) # Server configurations. data_base_url = vdict["BASE-URL-DATA"] spc_host = vdict["SPC-HOST"] spc_urlloc = vdict["SPC-URI"] hit_host = vdict["HIT-HOST"] hit_urlloc = vdict["HIT-URI"] # parameters on failover wait4res_duration = float(vdict["WAIT-DURATION-FOR-STAGE"]) poll4res_interval = float(vdict["CHECK-PERIOD-FOR-STAGE-OUTPUT"]) failover_interval = float(vdict["FAIL-SLEEP-INTERVAL"]) failover_retrylimit = float(vdict["FAIL-RETRY-CNT"]) # declare global data structures. cur_stages = [] cur_stage = None # parse input arguments #fname = parse_args() fname = taskpath dict_stages, dict_conns, cur_stage = parse_xml(fname) cur_stages.append(cur_stage) mdlib.log('* parsing completed.') mdlib.log('* starting parameter type checking.') mdchecker.check(dict_stages, dict_conns, cur_stage, ENV) mdlib.log("* [overhead] task interpretation delay: {0}".format( clock.stop())) #mdlib.log("* [overhead] type-checking delay: {0}".format(clock.stop())) mdlib.log('* done for type checking.') #pid = os.getpid() #send back pid #soc2.send(str(pid)) mdlib.log('* pid={pidval}'.format(pidval=pid)) # update program state if debug: mdlib.update_program_state(cs_dbhost, cs_dbname, pid, "startTFS", ENV["APP_NAME"]) # run program params_dict = dict() newdevice = "" if ENV["G_VAR"] != None: g_vars = ENV["G_VAR"].split(",") for i in range(len(g_vars)): g_kv = g_vars[i].split("=") params_dict[g_kv[0]] = g_kv[1] mdlib.log("Global input var name {0} is {1}".format( g_kv[0], g_kv[1])) while len(cur_stages) > 0: # initialize exec_result = [[]] * len(cur_stages) qid = [[]] * len(cur_stages) respstr = [[]] * len(cur_stages) # generate qid for i in range(len(cur_stages)): exec_result[i] = False qid[i] = random.randint(1000, 9999) mdlib.log('* qid[{idx}]={qidval}'.format(idx=i, qidval=qid[i])) mid = random.randint(1000, 9999) cur_stage = cur_stages[ 0] # assume that all stages in the same level has the same type. mdlib.log( "* [overhead] task tracker latency amonst stages: {0}".format( clock.stop())) ######################################## # Human Intelligence Task # if cur_stage.type == "HIT": mdlib.log('* invoking HIT [' + cur_stage.name + ']') # process amt credentials. if cur_stage.rid == None: rid = ENV["R_RID"] rkey = ENV["R_RKEY"] else: rid = ENV[cur_stage.rid] rkey = ENV[cur_stage.rkey] if cur_stage.wid == None: wid = ENV["W_WID"] else: wid = ENV[ cur_stage.wid] if cur_stage.wid in ENV else cur_stage.wid req_params = [[]] * len(cur_stages) # set data-owner if len(params_dict) > 0: dataowner = ENV["W_WID"] else: dataowner = 'None' # set data.. if cur_stage.config_input in params_dict: data = params_dict[cur_stage.config_input] else: data = 'None' # make requests (aggregated) for i in range(len(cur_stages)): params = { 'pid': pid, 'qid': qid[i], 'action': 'regHIT', 'qtype': cur_stage.stype, 'rid': rid, 'rkey': rkey, 'wid': wid, 'query': cur_stage.config_stmt, 'expiration': cur_stage.config_expiration, 'reward': cur_stage.config_reward, 'numusers': cur_stage.config_numusers, 'dataowner': dataowner, 'data': data } req_params[i] = params.copy() # wait for the response for i in range(len(cur_stages)): respstr[i], exec_result[i] = wait_for_hit_response( pid, qid[i], cur_stages[i], req_params[i]) mdlib.log("* [overhead] HIT task execution time: {0}".format( clock.stop())) ######################################### # SPC (Sensing, Processing, Communicaion) # elif cur_stage.type == "SPC": rp_dict = [[]] * len(cur_stages) # request route for i in range(len(cur_stages)): mdlib.log('* SPC Task [' + cur_stages[i].name + ']') if i > 0: time.sleep(3) rp_dict[i] = dict() emails = None cnt = 0 # if <device> tag is set by email address, anonymity will be gone. # commanding emails will be delivered to the phones directly. if cur_stages[i].device != None: type, addrs = cur_stages[i].device.split("=") mdlib.log("* <device> tag is set, content={0}".format( cur_stages[i].device)) if type != "EMAIL": mdlib.log( "! <device> config error. it should start with either 'EMAIL=' or 'AMT='" ) else: emails = addrs.split('|') if len(emails) > 0: anonymity = False userids = emails # make requests to mobile users uid = ENV["W_WID"] req_params = mst_request_params(pid, qid[i], cur_stages[i], uid, params_dict) if len(cur_stages) > 1: req_params[ 'custom'] = "<multi>{0}|{1}/{2}</multi>{3}".format( mid, i, len(cur_stages), req_params['custom']) rp_dict[i] = req_params if anonymity == True: # case I) req. to AMT resp = mdlib.httpsend(spc_host, spc_urlloc, req_params, clock) if resp[0] == 200: cnt = cnt + 1 else: mdlib.log("! mst_request_http_error code={0}".format( resp[0])) exit() else: # case II) req. via direct email req_params['custom'] = "<xml><pid>{0}</pid><qid>{1}</qid><amtid>{2}</amtid>{3}</xml>" \ .format(pid, qid[i], uid, req_params['custom']) #print req_params['custom'] mdlib.send_email('Medusa commanding message', req_params['custom'], uid) # wait for job completions mdlib.log("* sms cmd msg has been sent to userid={0}".format(uid)) ID = 0 info = [] while ID < len(cur_stages): msg = mrecv(soc2) print "####recieved msg : " + msg info = msg.split(',') if info[0] == 'completeTask': ID = ID + 1 print "8888" + str(info[2]) + " " + str(qid) if int(info[2]) in qid: print "8888 in if" respstr[i], exec_result[i] = pullout_db( info[1], info[2]) print "***" + str(respstr[i]) + " " + str( exec_result[i]) if (cur_stage.notification != None): print "batched notifcation ++++ : " + cur_stage.notification sqlstmt = "select path,uid,muid from CS_data where pid = {0} and qid = {1}".format( info[1], info[2]) con, cur = mdlib.get_raw_mysql_data( cs_dbhost, cs_dbname, sqlstmt) path = '' row = cur.fetchone() file_ids = row[1] + '=' if row != None else '' while row != None: path = path + row[0] + "," file_ids += row[2] + '|' row = cur.fetchone() con.close() path = path[:-1] if len(path) > 0 else path file_ids = file_ids[:-1] if len( file_ids) > 0 else file_ids if file_ids == '': file_ids = ENV["W_WID"] d = [(file_ids, path)] print d req = urllib2.Request(cur_stage.notification, urllib.urlencode(d)) u = urllib2.urlopen(req) # from here, notification for MediaScope, 20130326 # elif info[0] == 'file': # d= [(info[3], info[4])] # print "got single file:" +str(d) # if (cur_stage.notification != None): # req = urllib2.Request(cur_stage.notification, urllib.urlencode(d)) # u = urllib2.urlopen(req) # end of notification for MediaScope, 20130326 else: print "error, find out reason! " + msg # if (cur_stage.notification != None): # print "notifcation ++++ : "+cur_stage.notification # sqlstmt = "select path,uid,muid from CS_data where pid = {0} and qid = {1}"\ # .format(info[1], info[2]) # con, cur = mdlib.get_raw_mysql_data(cs_dbhost, cs_dbname, sqlstmt) # path = '' # row = cur.fetchone() # file_ids = row[1]+'=' if row != None else '' # while row != None: # path = path + row[0]+"," # file_ids += row[2]+'|' # row = cur.fetchone() # con.close() # path = path[:-1] if len(path)>0 else path # file_ids = file_ids[:-1] if len(file_ids)>0 else file_ids #dat = cur.fetchall() #path = '' #for row in dat: # path = path+row[0]+', ' #path = path[:-2] # d= [(file_ids, path)] # print d # req = urllib2.Request(cur_stage.notification, urllib.urlencode(d)) # u = urllib2.urlopen(req) # for i in range(len(cur_stages)): # respstr[i], exec_result[i] = wait_for_mst_response(pid, qid[i], rp_dict[i], cur_stages[i].config_output) mdlib.log("* [overhead] SPC task execution time: {0}".format( clock.stop())) else: mdlib.log("! unknown stage type: " + cur_stage.type) exit() # judge if the operation was successful exec_resall = True for i in range(len(cur_stages)): if exec_result[i] == False: exec_resall = False # # augment passing parameter set. # { 'amt_wid' : { 'var_name' : 'uidlist', 'var_name' : 'uidlist', ... }, ... } # if exec_resall == True: params_dict = interpret_result(respstr, cur_stages, params_dict) update_amt_cred(pid) else: params_dict = dict() mdlib.log(params_dict) # decide next stage. if cur_stage.name in dict_conns: cstages = [] for sg in cur_stages: for c in dict_conns[sg.name]: if exec_resall == True: st = dict_stages[c.dst_success] else: st = dict_stages[c.dst_failure] if st not in cstages: cstages.append(st) cur_stages = cstages else: cur_stages = [] # end of while cur_stage: # mdlib.log("* shutting down task-flow server..") # update program state if debug: mdlib.update_program_state(cs_dbhost, cs_dbname, pid, "exitTFS", ENV["APP_NAME"]) #soc1.close() soc2.close()
def parse_xml(fname): global default_timeout, default_datalimit, default_cmdpush global ENV curstage = None in_stages = dict() in_conns = dict() doc = ET.parse(os.path.abspath(fname)) ENV["APP_NAME"] = doc.find("app/name").text mdlib.log("* APP [" + ENV["APP_NAME"] + "] started") # # set user's amt credential # ENV["R_RID"] = mdlib.get_xmltag("app/rrid", doc, ENV["R_RID"]) ENV["R_RKEY"] = mdlib.get_xmltag("app/rrkey", doc, ENV["R_RKEY"]) ENV["R_WID"] = mdlib.get_xmltag("app/rwid", doc, ENV["R_WID"]) ENV["R_C2DM_KEY"] = mdlib.get_xmltag("app/rc2dmkey", doc, ENV["R_C2DM_KEY"]) ENV["W_RID"] = mdlib.get_xmltag("app/wrid", doc, ENV["W_RID"]) ENV["W_RKEY"] = mdlib.get_xmltag("app/wrkey", doc, ENV["W_RKEY"]) ENV["W_WID"] = mdlib.get_xmltag("app/wwid", doc, ENV["W_WID"]) ENV["W_C2DM_KEY"] = mdlib.get_xmltag("app/wc2dmkey", doc, ENV["W_C2DM_KEY"]) ENV["G_VAR"] = mdlib.get_xmltag("app/gvar", doc, ENV["G_VAR"]) default_timeout = mdlib.get_xmltag("app/timeout", doc, 5) default_datalimit = mdlib.get_xmltag("app/dlimit", doc, None) default_cmdpush = mdlib.get_xmltag("app/cmdpush", doc, default_cmdpush) stages_str = doc.findall("app/stage") for s in stages_str: stage = TFStage() # tags stage.xmlstr = re.sub('\s*$', '', re.sub('>\s*<', '><', re.sub('#.*\n', '', ET.tostring(s)))) stage.name = mdlib.get_xmltag("name", s, stage.name) stage.type = mdlib.get_xmltag("type", s, stage.type) stage.inst = mdlib.get_xmltag("inst", s, stage.inst) stage.listener = mdlib.get_xmltag("listener", s, stage.listener) stage.stype = mdlib.get_xmltag("binary", s, stage.stype) stage.timeout = mdlib.get_xmltag("timeout", s, stage.timeout) stage.device = mdlib.get_xmltag("device", s, stage.device) stage.trigger = mdlib.get_xmltag("trigger", s, stage.trigger) stage.notification = mdlib.get_xmltag('notification',s,None) stage.review = mdlib.get_xmltag("review", s, stage.review) stage.config_params = mdlib.get_xmltag("config/params", s, stage.config_params) stage.config_input = mdlib.get_xmltag("config/input", s, stage.config_input) stage.config_output = mdlib.get_xmltag("config/output", s, stage.config_output) # AMT-related tags stage.rid = mdlib.get_xmltag("rid", s, stage.rid) stage.rkey = mdlib.get_xmltag("rkey", s, stage.rkey) stage.wid = mdlib.get_xmltag("wid", s, stage.wid) stage.config_stmt = mdlib.get_xmltag("config/stmt", s, stage.config_stmt) stage.config_expiration = mdlib.get_xmltag("config/expiration", s, stage.config_expiration) stage.config_reward = mdlib.get_xmltag("config/reward", s, stage.config_reward) stage.config_numusers = mdlib.get_xmltag("config/numusers", s, stage.config_numusers) if curstage == None: curstage = stage if stage.name in in_stages: mdlib.log("! duplicated stage name") else: in_stages[stage.name] = stage del stage connectors_str = doc.findall("app/connector") for c in connectors_str: conlist = [] src = c.find("src").text dsts_str = c.findall("dst") for d in dsts_str: con = TFConn() con.src = src con.dst_success = d.find("success").text con.dst_failure = d.find("failure").text conlist.append(con) in_conns[con.src] = conlist; del con return in_stages, in_conns, curstage
def parse_xml(fname): global default_timeout, default_datalimit, default_cmdpush global ENV curstage = None in_stages = dict() in_conns = dict() doc = ET.parse(os.path.abspath(fname)) ENV["APP_NAME"] = doc.find("app/name").text mdlib.log("* APP [" + ENV["APP_NAME"] + "] started") # # set user's amt credential # ENV["R_RID"] = mdlib.get_xmltag("app/rrid", doc, ENV["R_RID"]) ENV["R_RKEY"] = mdlib.get_xmltag("app/rrkey", doc, ENV["R_RKEY"]) ENV["R_WID"] = mdlib.get_xmltag("app/rwid", doc, ENV["R_WID"]) ENV["R_C2DM_KEY"] = mdlib.get_xmltag("app/rc2dmkey", doc, ENV["R_C2DM_KEY"]) ENV["W_RID"] = mdlib.get_xmltag("app/wrid", doc, ENV["W_RID"]) ENV["W_RKEY"] = mdlib.get_xmltag("app/wrkey", doc, ENV["W_RKEY"]) ENV["W_WID"] = mdlib.get_xmltag("app/wwid", doc, ENV["W_WID"]) ENV["W_C2DM_KEY"] = mdlib.get_xmltag("app/wc2dmkey", doc, ENV["W_C2DM_KEY"]) ENV["G_VAR"] = mdlib.get_xmltag("app/gvar", doc, ENV["G_VAR"]) default_timeout = mdlib.get_xmltag("app/timeout", doc, 5) default_datalimit = mdlib.get_xmltag("app/dlimit", doc, None) default_cmdpush = mdlib.get_xmltag("app/cmdpush", doc, default_cmdpush) stages_str = doc.findall("app/stage") for s in stages_str: stage = TFStage() # tags stage.xmlstr = re.sub( '\s*$', '', re.sub('>\s*<', '><', re.sub('#.*\n', '', ET.tostring(s)))) stage.name = mdlib.get_xmltag("name", s, stage.name) stage.type = mdlib.get_xmltag("type", s, stage.type) stage.inst = mdlib.get_xmltag("inst", s, stage.inst) stage.listener = mdlib.get_xmltag("listener", s, stage.listener) stage.stype = mdlib.get_xmltag("binary", s, stage.stype) stage.timeout = mdlib.get_xmltag("timeout", s, stage.timeout) stage.device = mdlib.get_xmltag("device", s, stage.device) stage.trigger = mdlib.get_xmltag("trigger", s, stage.trigger) stage.notification = mdlib.get_xmltag('notification', s, None) stage.review = mdlib.get_xmltag("review", s, stage.review) stage.config_params = mdlib.get_xmltag("config/params", s, stage.config_params) stage.config_input = mdlib.get_xmltag("config/input", s, stage.config_input) stage.config_output = mdlib.get_xmltag("config/output", s, stage.config_output) # AMT-related tags stage.rid = mdlib.get_xmltag("rid", s, stage.rid) stage.rkey = mdlib.get_xmltag("rkey", s, stage.rkey) stage.wid = mdlib.get_xmltag("wid", s, stage.wid) stage.config_stmt = mdlib.get_xmltag("config/stmt", s, stage.config_stmt) stage.config_expiration = mdlib.get_xmltag("config/expiration", s, stage.config_expiration) stage.config_reward = mdlib.get_xmltag("config/reward", s, stage.config_reward) stage.config_numusers = mdlib.get_xmltag("config/numusers", s, stage.config_numusers) if curstage == None: curstage = stage if stage.name in in_stages: mdlib.log("! duplicated stage name") else: in_stages[stage.name] = stage del stage connectors_str = doc.findall("app/connector") for c in connectors_str: conlist = [] src = c.find("src").text dsts_str = c.findall("dst") for d in dsts_str: con = TFConn() con.src = src con.dst_success = d.find("success").text con.dst_failure = d.find("failure").text conlist.append(con) in_conns[con.src] = conlist del con return in_stages, in_conns, curstage
elif sys.argv[i] == "-rid": rid = sys.argv[i+1]; elif sys.argv[i] == "-rkey": rkey = sys.argv[i+1]; #elif sys.argv[i][0] != '-': #print '! wrong options' #exit() return opt ###################################################### # Program Start # option = parse_args() if option == None: mdlib.log("! no options specified") display_usage() clock1 = StopWatch() mdlib.log("********************************") mdlib.log("* Reset Utility *") mdlib.log("********************************") # set configurations. vdict = mdlib.get_env_vars(mdlib.get_dbhost(), "medusa", "HIT-HOST,HIT-URI"); hit_host = vdict["HIT-HOST"]; hit_urlloc = vdict["HIT-URI"]; pid = 'all' qid = 'all'
def task_main(taskpath): global data_base_url, spc_host, spc_urlloc, hit_host, hit_urlloc global wait4res_duration, poll4res_interval global failover_interval, failover_retrylimit global debug, anonymity, clock, ENV global soc2, cs_dbhost, cs_dbname global task_tracker_id mdlib.log("********************************") mdlib.log("* Task-Flow Server *") mdlib.log("********************************") mdlib.log("* starting task-flow server(TFS).") pid = os.getpid() print "++++pid+++++++" + str(pid) msend(soc2, str(pid) + " " + task_tracker_id) ###################################################### # Program Start # # load envvars vdict = get_env_values("BASE-URL-DATA,SPC-HOST,SPC-URI,HIT-HOST,HIT-URI,FAIL-SLEEP-INTERVAL,FAIL-RETRY-CNT,WAIT-DURATION-FOR-STAGE,CHECK-PERIOD-FOR-STAGE-OUTPUT") # Server configurations. data_base_url = vdict["BASE-URL-DATA"] spc_host = vdict["SPC-HOST"] spc_urlloc = vdict["SPC-URI"] hit_host = vdict["HIT-HOST"] hit_urlloc = vdict["HIT-URI"] # parameters on failover wait4res_duration = float(vdict["WAIT-DURATION-FOR-STAGE"]) poll4res_interval = float(vdict["CHECK-PERIOD-FOR-STAGE-OUTPUT"]) failover_interval = float(vdict["FAIL-SLEEP-INTERVAL"]) failover_retrylimit = float(vdict["FAIL-RETRY-CNT"]) # declare global data structures. cur_stages = [] cur_stage = None # parse input arguments #fname = parse_args() fname = taskpath dict_stages, dict_conns, cur_stage = parse_xml(fname) cur_stages.append(cur_stage) mdlib.log('* parsing completed.') mdlib.log('* starting parameter type checking.') mdchecker.check(dict_stages, dict_conns, cur_stage, ENV) mdlib.log("* [overhead] task interpretation delay: {0}".format(clock.stop())) #mdlib.log("* [overhead] type-checking delay: {0}".format(clock.stop())) mdlib.log('* done for type checking.') #pid = os.getpid() #send back pid #soc2.send(str(pid)) mdlib.log('* pid={pidval}'.format(pidval=pid)) # update program state if debug: mdlib.update_program_state(cs_dbhost, cs_dbname, pid, "startTFS", ENV["APP_NAME"]) # run program params_dict = dict() newdevice = "" if ENV["G_VAR"] != None: g_vars = ENV["G_VAR"].split(",") for i in range(len(g_vars)): g_kv = g_vars[i].split("=") params_dict[g_kv[0]] = g_kv[1] mdlib.log("Global input var name {0} is {1}".format(g_kv[0], g_kv[1])) while len(cur_stages) > 0: # initialize exec_result = [[]] * len(cur_stages) qid = [[]] * len(cur_stages) respstr = [[]] * len(cur_stages) # generate qid for i in range(len(cur_stages)): exec_result[i] = False qid[i] = random.randint(1000, 9999) mdlib.log('* qid[{idx}]={qidval}'.format(idx=i, qidval=qid[i])) mid = random.randint(1000,9999) cur_stage = cur_stages[0] # assume that all stages in the same level has the same type. mdlib.log("* [overhead] task tracker latency amonst stages: {0}".format(clock.stop())) ######################################## # Human Intelligence Task # if cur_stage.type == "HIT": mdlib.log('* invoking HIT ['+cur_stage.name+']') # process amt credentials. if cur_stage.rid == None: rid = ENV["R_RID"] rkey = ENV["R_RKEY"] else: rid = ENV[cur_stage.rid] rkey = ENV[cur_stage.rkey] if cur_stage.wid == None: wid = ENV["W_WID"] else: wid = ENV[cur_stage.wid] if cur_stage.wid in ENV else cur_stage.wid req_params = [[]] * len(cur_stages) # set data-owner if len(params_dict) > 0: dataowner = ENV["W_WID"] else: dataowner = 'None' # set data.. if cur_stage.config_input in params_dict: data = params_dict[cur_stage.config_input] else: data = 'None' # make requests (aggregated) for i in range(len(cur_stages)): params = {'pid' : pid, 'qid' : qid[i], 'action' : 'regHIT', 'qtype' : cur_stage.stype , 'rid' : rid, 'rkey' : rkey, 'wid' : wid , 'query' : cur_stage.config_stmt, 'expiration' : cur_stage.config_expiration , 'reward' : cur_stage.config_reward, 'numusers' : cur_stage.config_numusers , 'dataowner' : dataowner, 'data' : data } req_params[i] = params.copy() # wait for the response for i in range(len(cur_stages)): respstr[i], exec_result[i] = wait_for_hit_response(pid, qid[i], cur_stages[i], req_params[i]) mdlib.log("* [overhead] HIT task execution time: {0}".format(clock.stop())) ######################################### # SPC (Sensing, Processing, Communicaion) # elif cur_stage.type == "SPC": rp_dict = [[]] * len(cur_stages) # request route for i in range(len(cur_stages)): mdlib.log('* SPC Task [' + cur_stages[i].name + ']') if i > 0: time.sleep(3) rp_dict[i] = dict() emails = None cnt = 0 # if <device> tag is set by email address, anonymity will be gone. # commanding emails will be delivered to the phones directly. if cur_stages[i].device != None: type, addrs = cur_stages[i].device.split("=") mdlib.log("* <device> tag is set, content={0}".format(cur_stages[i].device)) if type != "EMAIL": mdlib.log("! <device> config error. it should start with either 'EMAIL=' or 'AMT='") else: emails = addrs.split('|') if len(emails) > 0: anonymity = False userids = emails # make requests to mobile users uid = ENV["W_WID"] req_params = mst_request_params(pid, qid[i], cur_stages[i], uid, params_dict) if len(cur_stages) > 1: req_params['custom'] = "<multi>{0}|{1}/{2}</multi>{3}".format(mid, i, len(cur_stages), req_params['custom']) rp_dict[i] = req_params if anonymity == True: # case I) req. to AMT resp = mdlib.httpsend(spc_host, spc_urlloc, req_params, clock) if resp[0] == 200: cnt = cnt+1 else: mdlib.log("! mst_request_http_error code={0}".format(resp[0])) exit() else: # case II) req. via direct email req_params['custom'] = "<xml><pid>{0}</pid><qid>{1}</qid><amtid>{2}</amtid>{3}</xml>" \ .format(pid, qid[i], uid, req_params['custom']) #print req_params['custom'] mdlib.send_email('Medusa commanding message', req_params['custom'], uid) # wait for job completions mdlib.log("* sms cmd msg has been sent to userid={0}".format(uid)) ID = 0 info = [] while ID < len(cur_stages): msg = mrecv(soc2) print "####recieved msg : "+msg info = msg.split(',') if info[0] == 'completeTask': ID = ID + 1 print "8888"+str(info[2])+" "+str(qid) if int(info[2]) in qid: print "8888 in if" respstr[i], exec_result[i] = pullout_db(info[1],info[2]) print "***"+str(respstr[i])+" "+str(exec_result[i]) if (cur_stage.notification != None): print "batched notifcation ++++ : "+cur_stage.notification sqlstmt = "select path,uid,muid from CS_data where pid = {0} and qid = {1}".format(info[1], info[2]) con, cur = mdlib.get_raw_mysql_data(cs_dbhost, cs_dbname, sqlstmt) path = '' row = cur.fetchone() file_ids = row[1]+'=' if row != None else '' while row != None: path = path + row[0]+"," file_ids += row[2]+'|' row = cur.fetchone() con.close() path = path[:-1] if len(path)>0 else path file_ids = file_ids[:-1] if len(file_ids)>0 else file_ids if file_ids == '': file_ids = ENV["W_WID"] d= [(file_ids, path)] print d req = urllib2.Request(cur_stage.notification, urllib.urlencode(d)) u = urllib2.urlopen(req) # from here, notification for MediaScope, 20130326 # elif info[0] == 'file': # d= [(info[3], info[4])] # print "got single file:" +str(d) # if (cur_stage.notification != None): # req = urllib2.Request(cur_stage.notification, urllib.urlencode(d)) # u = urllib2.urlopen(req) # end of notification for MediaScope, 20130326 else: print "error, find out reason! "+msg # if (cur_stage.notification != None): # print "notifcation ++++ : "+cur_stage.notification # sqlstmt = "select path,uid,muid from CS_data where pid = {0} and qid = {1}"\ # .format(info[1], info[2]) # con, cur = mdlib.get_raw_mysql_data(cs_dbhost, cs_dbname, sqlstmt) # path = '' # row = cur.fetchone() # file_ids = row[1]+'=' if row != None else '' # while row != None: # path = path + row[0]+"," # file_ids += row[2]+'|' # row = cur.fetchone() # con.close() # path = path[:-1] if len(path)>0 else path # file_ids = file_ids[:-1] if len(file_ids)>0 else file_ids #dat = cur.fetchall() #path = '' #for row in dat: # path = path+row[0]+', ' #path = path[:-2] # d= [(file_ids, path)] # print d # req = urllib2.Request(cur_stage.notification, urllib.urlencode(d)) # u = urllib2.urlopen(req) # for i in range(len(cur_stages)): # respstr[i], exec_result[i] = wait_for_mst_response(pid, qid[i], rp_dict[i], cur_stages[i].config_output) mdlib.log("* [overhead] SPC task execution time: {0}".format(clock.stop())) else: mdlib.log("! unknown stage type: " + cur_stage.type) exit() # judge if the operation was successful exec_resall = True for i in range(len(cur_stages)): if exec_result[i] == False: exec_resall = False # # augment passing parameter set. # { 'amt_wid' : { 'var_name' : 'uidlist', 'var_name' : 'uidlist', ... }, ... } # if exec_resall == True: params_dict = interpret_result(respstr, cur_stages, params_dict) update_amt_cred(pid) else: params_dict = dict(); mdlib.log(params_dict) # decide next stage. if cur_stage.name in dict_conns: cstages = [] for sg in cur_stages: for c in dict_conns[sg.name]: if exec_resall == True: st = dict_stages[c.dst_success] else: st = dict_stages[c.dst_failure] if st not in cstages: cstages.append(st) cur_stages = cstages else: cur_stages = [] # end of while cur_stage: # mdlib.log("* shutting down task-flow server..") # update program state if debug: mdlib.update_program_state(cs_dbhost, cs_dbname, pid, "exitTFS", ENV["APP_NAME"]) #soc1.close() soc2.close()