def spawn_tasks(self): """ This spawns ipython ipengine tasks NOTE: These ipython tasks are intended to be run on a single machine (transx) """ Get_Classifications_For_Ptf_Srcid = GetClassificationsForPtfSrcid( schema_str=self.schema_str ) # this class is just loaded for the next simple method (really, this next method could exist elsewhere and less initialization would be needed): total_srcid_list = Get_Classifications_For_Ptf_Srcid.retrieve_ptf_variable_sources( ) # KLUDGE: unfortunately we need to reinitialize taskclient due to memory leaks in a primary class. list_incr = 5 for i_low in xrange(0, len(total_srcid_list), list_incr): short_srcid_list = total_srcid_list[i_low:i_low + list_incr] exec_str = """schema_str="%s" for src_id in srcid_list: try: #is_already_ingested = Get_Classifications_For_Ptf_Srcid.check_srcid_ingested(src_id, schema_str) #if not is_already_ingested: if True: Get_Classifications_For_Ptf_Srcid.main(src_id=src_id) except: pass # skipping this srcid""" % (self.schema_str) taskid = self.tc.run(client.StringTask(exec_str, \ push={'srcid_list':short_srcid_list}))
def visitLink(self, url): if url not in self.allLinks: self.allLinks.append(url) if url.startswith(self.site): print ' ', url self.linksWorking[url] = self.tc.run( client.StringTask('links = fetchAndParse(url)', pull=['links'], push={'url': url}))
def time_twisted(nmessages, t=0, f=wait): from IPython.kernel import client as kc client = kc.TaskClient() if f is wait: s = "import time; time.sleep(%f)"%t task = kc.StringTask(s) elif f is echo: t = np.random.random(t/8) s = "s=t" task = kc.StringTask(s, push=dict(t=t), pull=['s']) else: raise # do one ping before starting timing client.barrier(client.run(task)) tic = time.time() tids = [] for i in xrange(nmessages): tids.append(client.run(task)) lap = time.time() client.barrier(tids) toc = time.time() return lap-tic, toc-tic
def parallel_populate_mysql_with_initial_tutor_sources(self, fpath_list, \ test_aitc=None): """ This takes fpaths to TUTOR Vosource.xmls and adds each source's srcid, science_class to class table in parallel using Ipython1. """ if test_aitc != None: # for linear TESTING without Ipython1 / parallelization: for fpath in fpath_list: test_aitc.insert_vosource_info_into_table(fpath) return from IPython.kernel import client tc = client.TaskClient() for fpath in fpath_list: exec_str = "aitc.insert_vosource_info_into_table('%s')" % (fpath) taskid = tc.run(client.StringTask(exec_str)) self.running_ingest_tasks.append(taskid)
def generate_classifications(self, schema_str=""): """ """ from IPython.kernel import client # 20091202 added ##### Do classifications using schema import get_classifications_for_ptf_srcid_and_class_schema IpythonTaskController = get_classifications_for_ptf_srcid_and_class_schema.\ Ipython_Task_Controller(schema_str=schema_str) IpythonTaskController.initialize_ipengines() #IpythonTaskController.spawn_tasks() srcid_list = [] select_str = 'SELECT tcp_srcid FROM ' + self.pars['lookup_tablename'] + ' WHERE tcp_srcid IS NOT NULL AND (class_type LIKE "%rrl%" OR class_type LIKE "%EB%" OR class_type LIKE "\%epheid\%")' self.cursor.execute(select_str) rows = self.cursor.fetchall() for row in rows: srcid_list.append(row[0]) list_incr = 5 for i_low in xrange(0, len(srcid_list), list_incr): short_srcid_list = srcid_list[i_low:i_low + list_incr] if 0: ### For debugging only: import classification_interface import plugin_classifier import get_classifications_for_ptf_srcid_and_class_schema Get_Classifications_For_Ptf_Srcid = get_classifications_for_ptf_srcid_and_class_schema.GetClassificationsForPtfSrcid(schema_str=schema_str) Get_Classifications_For_Ptf_Srcid.main(src_id=short_srcid_list[0]) exec_str = """schema_str="%s" for src_id in srcid_list: try: if True: Get_Classifications_For_Ptf_Srcid.main(src_id=src_id) except: pass # skipping this srcid""" % (schema_str) taskid = IpythonTaskController.tc.run(client.StringTask(exec_str, \ push={'srcid_list':short_srcid_list})) IpythonTaskController.wait_for_tasks_to_finish()
def main(self): """ Main function for Testing. """ # This tests the Multi-engine interface: mec = client.MultiEngineClient() exec_str = """import os os.environ['TCP_SEX_BIN']=os.path.expandvars('$HOME/bin/sex') os.environ['TCP_WCSTOOLS_DIR']=os.path.expandvars('$HOME/src/install/wcstools-3.6.4/bin/') os.environ['TCP_DIR']=os.path.expandvars('$HOME/src/TCP/') os.environ['TCP_DATA_DIR']=os.path.expandvars('$HOME/scratch/TCP_scratch/') os.environ['CLASSPATH']=os.path.expandvars('$HOME/src/install/weka-3-5-7/weka.jar') """ #if os.path.exists(os.path.expandvars("$HOME/.ipython/custom_configs")): execfile(os.path.expandvars("$HOME/.ipython/custom_configs")) mec.execute(exec_str) # This tests the task client interface: tc = client.TaskClient() task_list = [] n_iters_total = 8 n_iters_per_clear = 10 for i in xrange(n_iters_total): task_str = """cat = os.getpid()""" # os.getpid() # os.environ taskid = tc.run(client.StringTask(task_str, pull="cat")) task_list.append(taskid) ### NOTE: This can be used to thin down the ipcontroller memory storage of ### finished tasks, but afterwards you cannot retrieve values (below): #if (i % n_iters_per_clear == 0): # tc.clear() print '!!! NUMBER OF TASKS STILL SCHEDULED: ', tc.queue_status( )['scheduled'] for i, taskid in enumerate(task_list): ### NOTE: The following retrieval doesnt work if ### tc.clear() was called earlier: task_result = tc.get_task_result(taskid, block=True) print task_result['cat'] print 'done' print tc.queue_status()
def spawn_tasks__crap2(self): """ This spawns ipython ipengine tasks NOTE: These ipython tasks are intended to be run on a single machine (transx) """ Get_Classifications_For_Ptf_Srcid = GetClassificationsForPtfSrcid( schema_str=self.schema_str ) # this class is just loaded for the next simple method (really, this next method could exist elsewhere and less initialization would be needed): total_srcid_list = Get_Classifications_For_Ptf_Srcid.retrieve_ptf_variable_sources( ) # KLUDGE: unfortunately we need to reinitialize taskclient due to memory leaks in a primary class. #list_incr = 5 #for i_low in xrange(0, len(total_srcid_list), list_incr): # short_srcid_list = total_srcid_list[i_low:i_low + list_incr] if 1: short_srcid_list = total_srcid_list exec_str = """schema_str="%s" import os,sys import classification_interface import plugin_classifier import ptf_master import MySQLdb import get_classifications_for_ptf_srcid_and_class_schema Get_Classifications_For_Ptf_Srcid = get_classifications_for_ptf_srcid_and_class_schema.GetClassificationsForPtfSrcid(schema_str=schema_str) for src_id in srcid_list: try: is_already_ingested = Get_Classifications_For_Ptf_Srcid.check_srcid_ingested(src_id, schema_str) if not is_already_ingested: Get_Classifications_For_Ptf_Srcid.main(src_id=src_id) except: pass # skipping this srcid del Get_Classifications_For_Ptf_Srcid""" % (self.schema_str) taskid = self.tc.run(client.StringTask(exec_str, \ push={'srcid_list':short_srcid_list}, \ clear_after=True))
task_string = """\ op = MCOptionPricer(S,K,sigma,r,days,paths) op.run() vp, ap, vc, ac = op.vanilla_put, op.asian_put, op.vanilla_call, op.asian_call """ # Create arrays of strike prices and volatilities K_vals = N.linspace(90.0, 100.0, 5) sigma_vals = N.linspace(0.0, 0.2, 5) # Submit tasks taskids = [] for K in K_vals: for sigma in sigma_vals: t = client.StringTask(task_string, push=dict(sigma=sigma, K=K), pull=('vp', 'ap', 'vc', 'ac', 'sigma', 'K')) taskids.append(tc.run(t)) print "Submitted tasks: ", taskids # Block until tasks are completed tc.barrier(taskids) # Get the results results = [tc.get_task_result(tid) for tid in taskids] # Assemble the result vc = N.empty(K_vals.shape[0] * sigma_vals.shape[0], dtype='float64') vp = N.empty(K_vals.shape[0] * sigma_vals.shape[0], dtype='float64') ac = N.empty(K_vals.shape[0] * sigma_vals.shape[0], dtype='float64')
#!/usr/bin/env python # encoding: utf-8 from IPython.kernel import client import time tc = client.TaskClient() mec = client.MultiEngineClient() mec.execute('import time') for i in range(24): tc.run(client.StringTask('time.sleep(1)')) for i in range(6): time.sleep(1.0) print "Queue status (vebose=False)" print tc.queue_status() for i in range(24): tc.run(client.StringTask('time.sleep(1)')) for i in range(6): time.sleep(1.0) print "Queue status (vebose=True)" print tc.queue_status(True) for i in range(12): tc.run(client.StringTask('time.sleep(2)')) print "Queue status (vebose=True)"
def simulate(self): ep = self.expParams ep.samplingTime = int(ep.Tsim / (200 * ep.DTsim)) m = self.models r = self.recordings # Run simulation print 'Running lsms:', datetime.today().strftime('%x %X') t0=datetime.today() self.net.reset(); cmd = """ exper.reset() exper.models.input.resetStimulus(SUD) exper.run('oneRun', saveData = False) resp,inp = exper.getOutput() """ tc = self.IPcluster.getTaskControllerClient() tids=[] print "Preparing liquid responses for training phase..." for i in range(len(m.SudList)): tids.append(tc.run(kernel.StringTask(cmd, pull=['resp','inp'], push=dict(SUD=m.SudList[i])))) print len(tids), 'Tasks started...' tc.barrier(tids) m.trainResp = [] m.trainInp = [] for tid in range(len(tids)): res = tc.get_task_result(tids[tid]) if None<>res.failure: res.failure.printDetailedTraceback() res.failure.raiseException() m.trainResp.append(res.results['resp']) m.trainInp.append(res.results['inp']) print "Done." print "number of synapses of the first circuit is ", m.trainResp[0].numSynapses r.numSynapses = m.trainResp[0].numSynapses print "Shutting down cluster ..." self.IPcluster.stop() print "Done" currEpoch = 0 for currPhase in range(len(m.sudListSegments)): m.phasePreparations[currPhase]() print "Starting phase :", m.sudListSegments[currPhase][0] for epochWithinPhase in range(m.sudListSegments[currPhase][1],m.sudListSegments[currPhase][2]): if currEpoch % 10 == 0: stdout.write(str(currEpoch)) else: stdout.write(".") m.rewardInput.reset(currEpoch, m.trainResp[currEpoch], m.SudList[currEpoch][2]) m.readout.diminishWeights() self.net.advance(int(ep.trialT / ep.DTsim)) currEpoch += 1 print "done." t1=datetime.today() print '\nDone.', (t1-t0).seconds, 'sec CPU time for', ep.Tsim, 's simulation time'; self.expParams.simDuration = (t1 - t0).seconds
""" A Distributed Hello world Ken Kinder <*****@*****.**> """ from IPython.kernel import client tc = client.TaskClient() mec = client.MultiEngineClient() mec.execute('import time') hello_taskid = tc.run(client.StringTask('time.sleep(3) ; word = "Hello,"', pull=('word'))) world_taskid = tc.run(client.StringTask('time.sleep(3) ; word = "World!"', pull=('word'))) print "Submitted tasks:", hello_taskid, world_taskid print tc.get_task_result(hello_taskid,block=True).ns.word, tc.get_task_result(world_taskid,block=True).ns.word
def populate_mysql_with_iterative_classes_for_sources(self, aitc, \ vsrc_xml_fpath_list=[], \ do_nonparallel=False): """ Here we actually iteratively add the individual epochs for a vosource, classify, and enter into the analysis Mysql table. """ # TODO: Here we retrieve all relevant vosource.xml fpaths from # mysql table if len(vsrc_xml_fpath_list) > 0: vosource_fpath_list = vsrc_xml_fpath_list else: select_str = "SELECT fpath FROM %s" % (self.pars['table_name']) aitc.cursor.execute(select_str) results = aitc.cursor.fetchall() vosource_fpath_list = [] for result in results: vosource_fpath_list.append(result[0]) if do_nonparallel: import ptf_master #special_vosource_fpath_list = [] #for elem in special_vosource_fpath_list: # try: # vosource_fpath_list.pop(elem) # except: # pass #special_vosource_fpath_list.extend(vosource_fpath_list) #for i,fpath in enumerate(special_vosource_fpath_list): for i,fpath in enumerate(vosource_fpath_list): ptf_master.test_nonthread_nonipython1(use_postgre_ptf=False, case_simulate_ptf_stream_using_vosource=True, vosource_xml_fpath=fpath, case_poll_for_recent_postgre_table_entries=False, insert_row_into_iterative_class_probs=True) print "Done: VOSource %d of %d" % (i, len(vosource_fpath_list)) return (None, None) ##### For debugging using cProfile, kcachegrind, etc: #p = cProfile.Profile() #p.run(""" #import ptf_master #for i,fpath in enumerate(%s): # ptf_master.test_nonthread_nonipython1(use_postgre_ptf=False, # case_simulate_ptf_stream_using_vosource=True, # vosource_xml_fpath=fpath, # case_poll_for_recent_postgre_table_entries=False, # insert_row_into_iterative_class_probs=True)""" % (str(vosource_fpath_list[:14]))) #k = lsprofcalltree.KCacheGrind(p) #data = open('/tmp/prof_14.kgrind', 'w+') #k.output(data) #data.close() #sys.exit() from ipython1.kernel import client tc = client.TaskClient((self.pars['ipython_host_ip'], \ self.pars['ipython_taskclient_port'])) for fpath in vosource_fpath_list: exec_str = \ """ptf_master.test_nonthread_nonipython1(use_postgre_ptf=False, case_simulate_ptf_stream_using_vosource=True, vosource_xml_fpath='%s', case_poll_for_recent_postgre_table_entries=False, insert_row_into_iterative_class_probs=True) """ % (fpath) taskid = tc.run(client.StringTask(exec_str)) self.running_ingest_tasks.append(taskid) #print 'yo' # print tc.get_task_result(self.running_ingest_tasks[243], block=False) return (tc, vosource_fpath_list) # for polling which task threads are still queued, which are finished.
def main(): parser = OptionParser() parser.set_defaults(n=100) parser.set_defaults(tmin=1) parser.set_defaults(tmax=60) parser.set_defaults(controller='localhost') parser.set_defaults(meport=10105) parser.set_defaults(tport=10113) parser.add_option("-n", type='int', dest='n', help='the number of tasks to run') parser.add_option("-t", type='float', dest='tmin', help='the minimum task length in seconds') parser.add_option("-T", type='float', dest='tmax', help='the maximum task length in seconds') parser.add_option("-c", type='string', dest='controller', help='the address of the controller') parser.add_option( "-p", type='int', dest='meport', help= "the port on which the controller listens for the MultiEngine/RemoteController client" ) parser.add_option( "-P", type='int', dest='tport', help= "the port on which the controller listens for the TaskClient client") (opts, args) = parser.parse_args() assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin" rc = client.MultiEngineClient() tc = client.TaskClient() print tc.task_controller rc.block = True nengines = len(rc.get_ids()) rc.execute('from IPython.utils.timing import time') # the jobs should take a random time within a range times = [ random.random() * (opts.tmax - opts.tmin) + opts.tmin for i in range(opts.n) ] tasks = [client.StringTask("time.sleep(%f)" % t) for t in times] stime = sum(times) print "executing %i tasks, totalling %.1f secs on %i engines" % ( opts.n, stime, nengines) time.sleep(1) start = time.time() taskids = [tc.run(t) for t in tasks] tc.barrier(taskids) stop = time.time() ptime = stop - start scale = stime / ptime print "executed %.1f secs in %.1f secs" % (stime, ptime) print "%.3fx parallel performance on %i engines" % (scale, nengines) print "%.1f%% of theoretical max" % (100 * scale / nengines)
def main_ipython_cluster(self, noisify_attribs=[], ntrees=100, mtry=25, nodesize=5, n_iters=23): """ Main() for Debug_Feature_Class_Dependence Partially adapted from compare_randforest_classifers.py do training and crossvalidation on just Debosscher data for spped. - parse debosscher arff - remove certain features - train/test classifier using cross validation - store error rates for those removed features """ try: from IPython.kernel import client except: pass tc = self.initialize_mec(client=client) result_dict = {} new_orig_feat_tups = [] task_id_list = [] for feat_name in noisify_attribs: tc_exec_str = """ new_orig_feat_tups = '' task_randint = random.randint(0,1000000000000) classifier_base_dirpath = os.path.expandvars("$HOME/scratch/debug_feature_classifier_dependence/%d" % (task_randint)) os.system("mkdir -p %s" % (classifier_base_dirpath)) try: DebugFeatureClassDependence = debug_feature_classifier_dependence.Debug_Feature_Class_Dependence(pars={'algorithms_dirpath':os.path.abspath(os.environ.get("TCP_DIR") + 'Algorithms/')}) out_dict = DebugFeatureClassDependence.get_crossvalid_errors_for_single_arff(arff_fpath=pars['orig_arff_dirpath'], noisify_attribs=[feat_name], ntrees=ntrees, mtry=mtry, nodesize=nodesize, n_iters=n_iters, classifier_base_dirpath=classifier_base_dirpath, algorithms_dirpath=os.path.abspath(os.environ.get("TCP_DIR") + 'Algorithms/')) orig_wa = numpy.average(out_dict['means'], weights=out_dict['stds']) DebugFeatureClassDependence.load_rpy2_rc(algorithms_dirpath=os.path.abspath(os.environ.get("TCP_DIR") + 'Algorithms/')) out_dict = DebugFeatureClassDependence.get_crossvalid_errors_for_single_arff(arff_fpath=pars['new_arff_dirpath'], noisify_attribs=[feat_name], ntrees=ntrees, mtry=mtry, nodesize=nodesize, n_iters=n_iters, classifier_base_dirpath=classifier_base_dirpath, algorithms_dirpath=os.path.abspath(os.environ.get("TCP_DIR") + 'Algorithms/')) new_wa = numpy.average(out_dict['means'], weights=out_dict['stds']) new_orig_feat_tups = (new_wa - orig_wa, feat_name, numpy.std(out_dict['means'])) except: new_orig_feat_tups = str(sys.exc_info()) """ taskid = tc.run( client.StringTask( tc_exec_str, push={ 'pars': pars, 'feat_name': feat_name, 'ntrees': ntrees, 'mtry': mtry, 'nodesize': nodesize, 'n_iters': n_iters }, pull='new_orig_feat_tups', #'new_orig_feat_tups', retries=3)) task_id_list.append(taskid) if 0: ### debug: This inspect.getmembers() only works if task doesnt fail: time.sleep(60) temp = tc.get_task_result(taskid, block=False) import inspect for a, b in inspect.getmembers(temp): print a, b out_dict = temp.results.get('new_orig_feat_tups', None) import pdb pdb.set_trace() print ###### new_orig_feat_tups = self.wait_for_task_completion( task_id_list=task_id_list, tc=tc) new_orig_feat_tups.sort() pprint.pprint(new_orig_feat_tups) import pdb pdb.set_trace() print
from IPython.kernel import client tc = client.TaskClient() rc = client.MultiEngineClient() rc.push(dict(d=30)) cmd1 = """\ a = 5 b = 10*d c = a*b*d """ t1 = client.StringTask(cmd1, clear_before=False, clear_after=True, pull=['a','b','c']) tid1 = tc.run(t1) tr1 = tc.get_task_result(tid1,block=True) tr1.raise_exception() print "a, b: ", tr1.ns.a, tr1.ns.b
def spawn_off_arff_line_tasks(self, vosource_xml_dirpath): """ This spawns off ipython task clients which take vosource.xml fpaths and generate feature/class structure which will be used to create a .arff line. The task results should be 'pulled' and then inserted into a final Weka .arff file. """ ##### For testing: skipped_deb_srcids = [ '12645', '12646', '12649', '12653', '12655', '12656', '12658', '12660', '12670', '12675', '12700', '12745', '12766', '12797', '12798', '12806', '12841', '12847', '12849', '12850', '12851', '12852', '12853', '12854', '12856', '12858', '12861', '12864', '12868', '12869', '12870', '12875', '12879', '12882', '12885', '12886', '12888', '12890', '12891', '12893', '12895', '12901', '12904', '12907', '12909', '12914', '12915', '12921', '12923', '12924', '12928', '12930', '12932', '12933', '12934', '12936', '12941', '12948', '12950', '12957', '12958', '12960', '12961', '12970', '13007', '13024', '13034', '13059', '13076', '13078', '13091', '13094', '13119', '13122', '13128', '13156', '13170', '13172', '13239', '13242', '13246', '13247', '13261', '13268', '13280', '13324', '13333', '13354', '13360', '13362', '13369', '13374', '13402', '13418', '13420', '13421', '13423', '13424', '13425', '13427', '13429', '13432', '13433', '13439', '13440', '13442', '13443', '13444', '13448', '13458', '13462', '13465', '13466', '13469', '13471', '13476', '13477', '13478', '13480', '13481', '13483', '13484', '13491', '13493', '13495', '13500', '13502', '13505', '13511', '13519', '13520', '13521', '13530', '13535', '13543', '13544', '13552', '13553', '13560', '13561', '13564', '13565', '13571', '13573', '13577', '13580', '13582', '13591', '13594', '13596', '13602', '13607', '13608', '13616', '13618', '13622', '13623', '13625', '13630', '13632', '13638', '13642', '13646', '13647', '13650', '13656', '13657', '13668', '13676', '13678', '13680', '13686', '13687', '13689', '13690', '13692', '13694', '13695', '13698', '13701', '13703', '13704', '13708', '13712', '13716', '13717', '13718', '13719', '13722', '13723', '13731', '13733', '13739', '13740', '13743', '13744', '13747', '13748', '13750', '13760', '13763', '13774', '13776', '13777', '13780', '13782', '13783', '13784', '13786', '13788', '13793', '13800', '13804', '13806', '13810', '13814', '13815', '13819', '13824', '13826', '13832', '13833', '13838', '13843', '13847', '13851', '13854', '13858', '13860', '13869', '13873', '13881', '13882', '13885', '13888', '13889', '13890', '13892', '13893', '13894', '13896', '13898', '13900', '13906', '13911', '13922', '13927', '13928', '13929', '13936', '13938', '13942', '13944', '13951', '13955', '13957', '13958', '13959', '13962', '13965', '13972', '13974', '13988', '13989', '13996', '13997', '13998', '14004', '14006', '14009', '14010', '14017', '14018', '14024', '14025', '14028', '14029', '14032', '14035', '14043', '14047', '14048', '14051', '14055', '14056', '14065', '14066', '14070', '14071', '14072', '14087', '14088', '14089', '14093', '14095', '14104', '14108', '14109', '14113', '14117', '14120', '14122', '14125', '14129', '14133', '14136', '14137', '14151', '14155', '14157', '14163', '14166', '14167', '14168', '14174', '14175', '14181', '14182', '14186', '14191', '14194', '14198', '14205', '14206', '14216', '14218', '14219', '14225', '14226', '14234', '14239', '14243', '14244', '14246', '14247', '14248', '14250', '14251', '14255', '14256', '14263', '14269', '14275', '14280', '14282' ] import dotastro_sciclass_tools dst = dotastro_sciclass_tools.Dotastro_Sciclass_Tools() dst.make_tutor_db_connection() ##### xml_fpath_list = glob.glob(vosource_xml_dirpath + '/*xml') # KLUDGE: This can potentially load a lot of xml-strings into memory: for xml_fpath in xml_fpath_list: fname = xml_fpath[xml_fpath.rfind('/') + 1:xml_fpath.rfind('.')] num = fname # Seems OK: ?CAN I just use the filename rather than the sourceid? # xml_fname[:xml_fname.rfind('.')] #srcid_xml_tuple_list.append((num, xml_fpath)) #task_str = """cat = os.getpid()""" #taskid = self.tc.run(client.StringTask(task_str, pull="cat")) #time.sleep(1) #print self.tc.get_task_result(taskid, block=False).results #print 'yo' ##### For testing: #if "100017522.xml" in xml_fpath: # print "yo" if 0: import pdb pdb.set_trace() print num_orig_str = str(int(num) - 100000000) if num_orig_str in skipped_deb_srcids: #print num_orig_str select_str = "select sources.source_id, sources.project_id, sources.source_name, sources.class_id, sources.pclass_id, project_classes.pclass_name, project_classes.pclass_short_name from Sources join project_classes using (pclass_id) where source_id = %s" % ( num_orig_str) dst.cursor.execute(select_str) results = dst.cursor.fetchall() a = arffify.Maker( search=[], skip_class=False, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False) out_dict = a.generate_arff_line_for_vosourcexml( num=str(num), xml_fpath=xml_fpath) print '!!!', results[0] else: try: a = arffify.Maker( search=[], skip_class=False, local_xmls=True, convert_class_abrvs_to_names=False, flag_retrieve_class_abrvs_from_TUTOR=False, dorun=False) out_dict = a.generate_arff_line_for_vosourcexml( num=str(num), xml_fpath=xml_fpath) except: print "barf on some xml:", xml_fpath #print xml_fpath #continue ##### if 1: exec_str = """out_dict = a.generate_arff_line_for_vosourcexml(num="%s", xml_fpath="%s") """ % (str(num), xml_fpath) #print exec_str try: taskid = self.tc.run(client.StringTask(exec_str, \ pull='out_dict', retries=3)) self.task_id_list.append(taskid) except: print "EXCEPT!: taskid=", taskid, exec_str