Пример #1
0
    def spawn_tasks(self):
        """ This spawns ipython ipengine tasks

        NOTE: These ipython tasks are intended to be run on a single machine (transx)
        """
        Get_Classifications_For_Ptf_Srcid = GetClassificationsForPtfSrcid(
            schema_str=self.schema_str
        )  # this class is just loaded for the next simple method (really, this next method could exist elsewhere and less initialization would be needed):
        total_srcid_list = Get_Classifications_For_Ptf_Srcid.retrieve_ptf_variable_sources(
        )

        # KLUDGE: unfortunately we need to reinitialize taskclient due to memory leaks in a primary class.
        list_incr = 5
        for i_low in xrange(0, len(total_srcid_list), list_incr):
            short_srcid_list = total_srcid_list[i_low:i_low + list_incr]
            exec_str = """schema_str="%s"
for src_id in srcid_list:
    try:
        #is_already_ingested = Get_Classifications_For_Ptf_Srcid.check_srcid_ingested(src_id, schema_str)
        #if not is_already_ingested:
        if True:
            Get_Classifications_For_Ptf_Srcid.main(src_id=src_id)
    except:
        pass # skipping this srcid""" % (self.schema_str)
            taskid = self.tc.run(client.StringTask(exec_str, \
                                                   push={'srcid_list':short_srcid_list}))
Пример #2
0
 def visitLink(self, url):
     if url not in self.allLinks:
         self.allLinks.append(url)
         if url.startswith(self.site):
             print '    ', url
             self.linksWorking[url] = self.tc.run(
                 client.StringTask('links = fetchAndParse(url)',
                                   pull=['links'],
                                   push={'url': url}))
Пример #3
0
def time_twisted(nmessages, t=0, f=wait):
    from IPython.kernel import client as kc
    client = kc.TaskClient()
    if f is wait:
        s = "import time; time.sleep(%f)"%t
        task = kc.StringTask(s)
    elif f is echo:
        t = np.random.random(t/8)
        s = "s=t"
        task = kc.StringTask(s, push=dict(t=t), pull=['s'])
    else:
        raise
    # do one ping before starting timing
    client.barrier(client.run(task))
    tic = time.time()
    tids = []
    for i in xrange(nmessages):
        tids.append(client.run(task))
    lap = time.time()
    client.barrier(tids)
    toc = time.time()
    return lap-tic, toc-tic
    def parallel_populate_mysql_with_initial_tutor_sources(self, fpath_list, \
                                                           test_aitc=None):
        """ This takes fpaths to TUTOR Vosource.xmls and adds each source's
        srcid, science_class to class table in parallel using Ipython1. 
        
        """
        if test_aitc != None:
            # for linear TESTING without Ipython1 / parallelization:
            for fpath in fpath_list:
                test_aitc.insert_vosource_info_into_table(fpath)
            return

        from IPython.kernel import client
        tc = client.TaskClient()

        for fpath in fpath_list:
            exec_str = "aitc.insert_vosource_info_into_table('%s')" % (fpath)
            taskid = tc.run(client.StringTask(exec_str))
            self.running_ingest_tasks.append(taskid)
    def generate_classifications(self, schema_str=""):
        """
        """
        from IPython.kernel import client # 20091202 added
        ##### Do classifications using schema
        import get_classifications_for_ptf_srcid_and_class_schema
        IpythonTaskController = get_classifications_for_ptf_srcid_and_class_schema.\
                                     Ipython_Task_Controller(schema_str=schema_str)
        IpythonTaskController.initialize_ipengines()
        #IpythonTaskController.spawn_tasks()

        srcid_list = []
        select_str = 'SELECT tcp_srcid FROM ' + self.pars['lookup_tablename'] + ' WHERE tcp_srcid IS NOT NULL AND (class_type LIKE "%rrl%" OR class_type LIKE "%EB%" OR class_type LIKE "\%epheid\%")'

        self.cursor.execute(select_str)
        rows = self.cursor.fetchall()
        for row in rows:
            srcid_list.append(row[0])

        list_incr = 5
        for i_low in xrange(0, len(srcid_list), list_incr):
            short_srcid_list = srcid_list[i_low:i_low + list_incr]
            if 0:
                ### For debugging only:
                import classification_interface
                import plugin_classifier
                import get_classifications_for_ptf_srcid_and_class_schema
                Get_Classifications_For_Ptf_Srcid = get_classifications_for_ptf_srcid_and_class_schema.GetClassificationsForPtfSrcid(schema_str=schema_str)
                Get_Classifications_For_Ptf_Srcid.main(src_id=short_srcid_list[0])
            exec_str = """schema_str="%s"
for src_id in srcid_list:
    try:
        if True:
            Get_Classifications_For_Ptf_Srcid.main(src_id=src_id)
    except:
        pass # skipping this srcid""" % (schema_str)
            taskid = IpythonTaskController.tc.run(client.StringTask(exec_str, \
                                                   push={'srcid_list':short_srcid_list}))


        IpythonTaskController.wait_for_tasks_to_finish()
    def main(self):
        """ Main function for Testing.
        """
        # This tests the Multi-engine interface:
        mec = client.MultiEngineClient()
        exec_str = """import os
os.environ['TCP_SEX_BIN']=os.path.expandvars('$HOME/bin/sex')
os.environ['TCP_WCSTOOLS_DIR']=os.path.expandvars('$HOME/src/install/wcstools-3.6.4/bin/')
os.environ['TCP_DIR']=os.path.expandvars('$HOME/src/TCP/')
os.environ['TCP_DATA_DIR']=os.path.expandvars('$HOME/scratch/TCP_scratch/')
os.environ['CLASSPATH']=os.path.expandvars('$HOME/src/install/weka-3-5-7/weka.jar')

        """
        #if os.path.exists(os.path.expandvars("$HOME/.ipython/custom_configs")): execfile(os.path.expandvars("$HOME/.ipython/custom_configs"))
        mec.execute(exec_str)

        # This tests the task client interface:
        tc = client.TaskClient()
        task_list = []

        n_iters_total = 8
        n_iters_per_clear = 10
        for i in xrange(n_iters_total):
            task_str = """cat = os.getpid()"""  # os.getpid() # os.environ
            taskid = tc.run(client.StringTask(task_str, pull="cat"))
            task_list.append(taskid)
            ### NOTE: This can be used to thin down the ipcontroller memory storage of
            ###       finished tasks, but afterwards you cannot retrieve values (below):
            #if (i % n_iters_per_clear == 0):
            #    tc.clear()
        print '!!! NUMBER OF TASKS STILL SCHEDULED: ', tc.queue_status(
        )['scheduled']
        for i, taskid in enumerate(task_list):
            ### NOTE: The following retrieval doesnt work if
            ###       tc.clear()      was called earlier:
            task_result = tc.get_task_result(taskid, block=True)
            print task_result['cat']
        print 'done'
        print tc.queue_status()
Пример #7
0
    def spawn_tasks__crap2(self):
        """ This spawns ipython ipengine tasks

        NOTE: These ipython tasks are intended to be run on a single machine (transx)
        """
        Get_Classifications_For_Ptf_Srcid = GetClassificationsForPtfSrcid(
            schema_str=self.schema_str
        )  # this class is just loaded for the next simple method (really, this next method could exist elsewhere and less initialization would be needed):
        total_srcid_list = Get_Classifications_For_Ptf_Srcid.retrieve_ptf_variable_sources(
        )

        # KLUDGE: unfortunately we need to reinitialize taskclient due to memory leaks in a primary class.
        #list_incr = 5
        #for i_low in xrange(0, len(total_srcid_list), list_incr):
        #    short_srcid_list = total_srcid_list[i_low:i_low + list_incr]
        if 1:
            short_srcid_list = total_srcid_list
            exec_str = """schema_str="%s"
import os,sys
import classification_interface
import plugin_classifier
import ptf_master
import MySQLdb
import get_classifications_for_ptf_srcid_and_class_schema
Get_Classifications_For_Ptf_Srcid = get_classifications_for_ptf_srcid_and_class_schema.GetClassificationsForPtfSrcid(schema_str=schema_str)
for src_id in srcid_list:
    try:
        is_already_ingested = Get_Classifications_For_Ptf_Srcid.check_srcid_ingested(src_id, schema_str)
        if not is_already_ingested:
            Get_Classifications_For_Ptf_Srcid.main(src_id=src_id)
    except:
        pass # skipping this srcid
del Get_Classifications_For_Ptf_Srcid""" % (self.schema_str)
            taskid = self.tc.run(client.StringTask(exec_str, \
                                                   push={'srcid_list':short_srcid_list}, \
                                                   clear_after=True))
Пример #8
0
task_string = """\
op = MCOptionPricer(S,K,sigma,r,days,paths)
op.run()
vp, ap, vc, ac = op.vanilla_put, op.asian_put, op.vanilla_call, op.asian_call
"""

# Create arrays of strike prices and volatilities
K_vals = N.linspace(90.0, 100.0, 5)
sigma_vals = N.linspace(0.0, 0.2, 5)

# Submit tasks
taskids = []
for K in K_vals:
    for sigma in sigma_vals:
        t = client.StringTask(task_string,
                              push=dict(sigma=sigma, K=K),
                              pull=('vp', 'ap', 'vc', 'ac', 'sigma', 'K'))
        taskids.append(tc.run(t))

print "Submitted tasks: ", taskids

# Block until tasks are completed
tc.barrier(taskids)

# Get the results
results = [tc.get_task_result(tid) for tid in taskids]

# Assemble the result
vc = N.empty(K_vals.shape[0] * sigma_vals.shape[0], dtype='float64')
vp = N.empty(K_vals.shape[0] * sigma_vals.shape[0], dtype='float64')
ac = N.empty(K_vals.shape[0] * sigma_vals.shape[0], dtype='float64')
Пример #9
0
#!/usr/bin/env python
# encoding: utf-8

from IPython.kernel import client
import time

tc = client.TaskClient()
mec = client.MultiEngineClient()

mec.execute('import time')

for i in range(24):
    tc.run(client.StringTask('time.sleep(1)'))

for i in range(6):
    time.sleep(1.0)
    print "Queue status (vebose=False)"
    print tc.queue_status()
    
for i in range(24):
    tc.run(client.StringTask('time.sleep(1)'))

for i in range(6):
    time.sleep(1.0)
    print "Queue status (vebose=True)"
    print tc.queue_status(True)

for i in range(12):
    tc.run(client.StringTask('time.sleep(2)'))

print "Queue status (vebose=True)"
    def simulate(self):
        ep = self.expParams        
        ep.samplingTime = int(ep.Tsim / (200 * ep.DTsim))
        m = self.models
        r = self.recordings
        
        # Run simulation 
        print 'Running lsms:', datetime.today().strftime('%x %X')
        
        t0=datetime.today()
        
        self.net.reset();
        
        cmd = """
exper.reset()
exper.models.input.resetStimulus(SUD)
exper.run('oneRun', saveData = False)
resp,inp = exper.getOutput()
        """
        
        tc = self.IPcluster.getTaskControllerClient()
        
        tids=[]
        print "Preparing liquid responses for training phase..."
        for i in range(len(m.SudList)):        
            tids.append(tc.run(kernel.StringTask(cmd, pull=['resp','inp'], push=dict(SUD=m.SudList[i]))))

        print len(tids), 'Tasks started...'
        tc.barrier(tids)
        
        m.trainResp = []
        m.trainInp = []
        for tid in range(len(tids)):
            res = tc.get_task_result(tids[tid])
            if None<>res.failure:
                res.failure.printDetailedTraceback()
                res.failure.raiseException()            
            m.trainResp.append(res.results['resp'])
            m.trainInp.append(res.results['inp'])
        print "Done."
        
        print "number of synapses of the first circuit is ", m.trainResp[0].numSynapses
        r.numSynapses = m.trainResp[0].numSynapses
        
        print "Shutting down cluster ..."
        self.IPcluster.stop()
        print "Done"
        
        currEpoch = 0        
        for currPhase in range(len(m.sudListSegments)):
            m.phasePreparations[currPhase]()
            print "Starting phase :", m.sudListSegments[currPhase][0]
            for epochWithinPhase in range(m.sudListSegments[currPhase][1],m.sudListSegments[currPhase][2]):
                if currEpoch % 10 == 0:
                    stdout.write(str(currEpoch))
                else:
                    stdout.write(".")
                            
                m.rewardInput.reset(currEpoch, m.trainResp[currEpoch], m.SudList[currEpoch][2])
                m.readout.diminishWeights()                                            
                self.net.advance(int(ep.trialT / ep.DTsim))            
                currEpoch  += 1
            print "done."
            
        
        t1=datetime.today()
        print '\nDone.', (t1-t0).seconds, 'sec CPU time for', ep.Tsim, 's simulation time';        
        self.expParams.simDuration = (t1 - t0).seconds
Пример #11
0
"""
A Distributed Hello world
Ken Kinder <*****@*****.**>
"""
from IPython.kernel import client

tc = client.TaskClient()
mec = client.MultiEngineClient()

mec.execute('import time')
hello_taskid = tc.run(client.StringTask('time.sleep(3) ; word = "Hello,"', pull=('word')))
world_taskid = tc.run(client.StringTask('time.sleep(3) ; word = "World!"', pull=('word')))
print "Submitted tasks:", hello_taskid, world_taskid
print tc.get_task_result(hello_taskid,block=True).ns.word, tc.get_task_result(world_taskid,block=True).ns.word
    def populate_mysql_with_iterative_classes_for_sources(self, aitc, \
                                                    vsrc_xml_fpath_list=[], \
                                                    do_nonparallel=False):
        """ Here we actually iteratively add the individual epochs
        for a vosource, classify, and enter into the analysis Mysql table.
        """
        # TODO: Here we retrieve all relevant vosource.xml fpaths from
        #     mysql table

        if len(vsrc_xml_fpath_list) > 0:
            vosource_fpath_list = vsrc_xml_fpath_list
        else:
            select_str = "SELECT fpath FROM %s" % (self.pars['table_name'])
            aitc.cursor.execute(select_str)

            results = aitc.cursor.fetchall()
            vosource_fpath_list = []
            for result in results:
                vosource_fpath_list.append(result[0])
            


        if do_nonparallel:
            import ptf_master
            #special_vosource_fpath_list = []
            #for elem in special_vosource_fpath_list:
            #    try:
            #        vosource_fpath_list.pop(elem)
            #    except:
            #        pass
            #special_vosource_fpath_list.extend(vosource_fpath_list)
            #for i,fpath in enumerate(special_vosource_fpath_list):
            for i,fpath in enumerate(vosource_fpath_list):
                ptf_master.test_nonthread_nonipython1(use_postgre_ptf=False, 
                              case_simulate_ptf_stream_using_vosource=True, 
                              vosource_xml_fpath=fpath,
                              case_poll_for_recent_postgre_table_entries=False,
                              insert_row_into_iterative_class_probs=True)
                print "Done: VOSource %d of %d" % (i, len(vosource_fpath_list))
            return (None, None)

            ##### For debugging using cProfile, kcachegrind, etc:
            #p = cProfile.Profile()
            #p.run("""
#import ptf_master
#for i,fpath in enumerate(%s):
#    ptf_master.test_nonthread_nonipython1(use_postgre_ptf=False, 
#    case_simulate_ptf_stream_using_vosource=True, 
#    vosource_xml_fpath=fpath,
#    case_poll_for_recent_postgre_table_entries=False,
#    insert_row_into_iterative_class_probs=True)""" % (str(vosource_fpath_list[:14])))
            #k = lsprofcalltree.KCacheGrind(p)
            #data = open('/tmp/prof_14.kgrind', 'w+')
            #k.output(data)
            #data.close()
            #sys.exit()


        from ipython1.kernel import client
        tc = client.TaskClient((self.pars['ipython_host_ip'], \
                                self.pars['ipython_taskclient_port']))

        for fpath in vosource_fpath_list:
            exec_str = \
               """ptf_master.test_nonthread_nonipython1(use_postgre_ptf=False, 
                               case_simulate_ptf_stream_using_vosource=True, 
                               vosource_xml_fpath='%s',
                               case_poll_for_recent_postgre_table_entries=False,
                               insert_row_into_iterative_class_probs=True)
               """ % (fpath)

            taskid = tc.run(client.StringTask(exec_str))
            self.running_ingest_tasks.append(taskid)
        #print 'yo' # print tc.get_task_result(self.running_ingest_tasks[243], block=False)
        return (tc, vosource_fpath_list) # for polling which task threads are still queued, which are finished.
Пример #13
0
def main():
    parser = OptionParser()
    parser.set_defaults(n=100)
    parser.set_defaults(tmin=1)
    parser.set_defaults(tmax=60)
    parser.set_defaults(controller='localhost')
    parser.set_defaults(meport=10105)
    parser.set_defaults(tport=10113)

    parser.add_option("-n",
                      type='int',
                      dest='n',
                      help='the number of tasks to run')
    parser.add_option("-t",
                      type='float',
                      dest='tmin',
                      help='the minimum task length in seconds')
    parser.add_option("-T",
                      type='float',
                      dest='tmax',
                      help='the maximum task length in seconds')
    parser.add_option("-c",
                      type='string',
                      dest='controller',
                      help='the address of the controller')
    parser.add_option(
        "-p",
        type='int',
        dest='meport',
        help=
        "the port on which the controller listens for the MultiEngine/RemoteController client"
    )
    parser.add_option(
        "-P",
        type='int',
        dest='tport',
        help=
        "the port on which the controller listens for the TaskClient client")

    (opts, args) = parser.parse_args()
    assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin"

    rc = client.MultiEngineClient()
    tc = client.TaskClient()
    print tc.task_controller
    rc.block = True
    nengines = len(rc.get_ids())
    rc.execute('from IPython.utils.timing import time')

    # the jobs should take a random time within a range
    times = [
        random.random() * (opts.tmax - opts.tmin) + opts.tmin
        for i in range(opts.n)
    ]
    tasks = [client.StringTask("time.sleep(%f)" % t) for t in times]
    stime = sum(times)

    print "executing %i tasks, totalling %.1f secs on %i engines" % (
        opts.n, stime, nengines)
    time.sleep(1)
    start = time.time()
    taskids = [tc.run(t) for t in tasks]
    tc.barrier(taskids)
    stop = time.time()

    ptime = stop - start
    scale = stime / ptime

    print "executed %.1f secs in %.1f secs" % (stime, ptime)
    print "%.3fx parallel performance on %i engines" % (scale, nengines)
    print "%.1f%% of theoretical max" % (100 * scale / nengines)
Пример #14
0
    def main_ipython_cluster(self,
                             noisify_attribs=[],
                             ntrees=100,
                             mtry=25,
                             nodesize=5,
                             n_iters=23):
        """ Main() for Debug_Feature_Class_Dependence

        Partially adapted from compare_randforest_classifers.py

do training and crossvalidation on just Debosscher data for spped.
   - parse debosscher arff
   - remove certain features
   - train/test classifier using cross validation
   - store error rates for those removed features
        """
        try:
            from IPython.kernel import client
        except:
            pass

        tc = self.initialize_mec(client=client)

        result_dict = {}
        new_orig_feat_tups = []

        task_id_list = []
        for feat_name in noisify_attribs:
            tc_exec_str = """
new_orig_feat_tups = ''
task_randint = random.randint(0,1000000000000)
classifier_base_dirpath = os.path.expandvars("$HOME/scratch/debug_feature_classifier_dependence/%d" % (task_randint))
os.system("mkdir -p %s" % (classifier_base_dirpath))
try:
    DebugFeatureClassDependence = debug_feature_classifier_dependence.Debug_Feature_Class_Dependence(pars={'algorithms_dirpath':os.path.abspath(os.environ.get("TCP_DIR") + 'Algorithms/')})
    out_dict = DebugFeatureClassDependence.get_crossvalid_errors_for_single_arff(arff_fpath=pars['orig_arff_dirpath'],
                                                      noisify_attribs=[feat_name],
                                                      ntrees=ntrees,
                                                      mtry=mtry,
                                                      nodesize=nodesize,
                                                      n_iters=n_iters,
                                                      classifier_base_dirpath=classifier_base_dirpath,
                                                      algorithms_dirpath=os.path.abspath(os.environ.get("TCP_DIR") + 'Algorithms/'))

    orig_wa = numpy.average(out_dict['means'], weights=out_dict['stds'])
    DebugFeatureClassDependence.load_rpy2_rc(algorithms_dirpath=os.path.abspath(os.environ.get("TCP_DIR") + 'Algorithms/'))
    out_dict = DebugFeatureClassDependence.get_crossvalid_errors_for_single_arff(arff_fpath=pars['new_arff_dirpath'],
                                                          noisify_attribs=[feat_name],
                                                          ntrees=ntrees,
                                                          mtry=mtry,
                                                          nodesize=nodesize,
                                                          n_iters=n_iters,
                                                          classifier_base_dirpath=classifier_base_dirpath,
                                                          algorithms_dirpath=os.path.abspath(os.environ.get("TCP_DIR") + 'Algorithms/'))
    new_wa = numpy.average(out_dict['means'], weights=out_dict['stds'])
    new_orig_feat_tups = (new_wa - orig_wa, feat_name, numpy.std(out_dict['means']))


except:
    new_orig_feat_tups = str(sys.exc_info())
            """

            taskid = tc.run(
                client.StringTask(
                    tc_exec_str,
                    push={
                        'pars': pars,
                        'feat_name': feat_name,
                        'ntrees': ntrees,
                        'mtry': mtry,
                        'nodesize': nodesize,
                        'n_iters': n_iters
                    },
                    pull='new_orig_feat_tups',  #'new_orig_feat_tups', 
                    retries=3))
            task_id_list.append(taskid)
            if 0:
                ### debug: This inspect.getmembers() only works if task doesnt fail:
                time.sleep(60)
                temp = tc.get_task_result(taskid, block=False)
                import inspect
                for a, b in inspect.getmembers(temp):
                    print a, b
                out_dict = temp.results.get('new_orig_feat_tups', None)
                import pdb
                pdb.set_trace()
                print
            ######

        new_orig_feat_tups = self.wait_for_task_completion(
            task_id_list=task_id_list, tc=tc)

        new_orig_feat_tups.sort()
        pprint.pprint(new_orig_feat_tups)
        import pdb
        pdb.set_trace()
        print
Пример #15
0
from IPython.kernel import client

tc = client.TaskClient()
rc = client.MultiEngineClient()

rc.push(dict(d=30))

cmd1 = """\
a = 5
b = 10*d
c = a*b*d
"""

t1 = client.StringTask(cmd1, clear_before=False, clear_after=True, pull=['a','b','c'])
tid1 = tc.run(t1)
tr1 = tc.get_task_result(tid1,block=True)
tr1.raise_exception()
print "a, b: ", tr1.ns.a, tr1.ns.b
    def spawn_off_arff_line_tasks(self, vosource_xml_dirpath):
        """ This spawns off ipython task clients which
	take vosource.xml fpaths and generate feature/class structure
	which will be used to create a .arff line.
	The task results should be 'pulled' and then inserted into a final
	Weka .arff file.
	"""
        ##### For testing:
        skipped_deb_srcids = [
            '12645', '12646', '12649', '12653', '12655', '12656', '12658',
            '12660', '12670', '12675', '12700', '12745', '12766', '12797',
            '12798', '12806', '12841', '12847', '12849', '12850', '12851',
            '12852', '12853', '12854', '12856', '12858', '12861', '12864',
            '12868', '12869', '12870', '12875', '12879', '12882', '12885',
            '12886', '12888', '12890', '12891', '12893', '12895', '12901',
            '12904', '12907', '12909', '12914', '12915', '12921', '12923',
            '12924', '12928', '12930', '12932', '12933', '12934', '12936',
            '12941', '12948', '12950', '12957', '12958', '12960', '12961',
            '12970', '13007', '13024', '13034', '13059', '13076', '13078',
            '13091', '13094', '13119', '13122', '13128', '13156', '13170',
            '13172', '13239', '13242', '13246', '13247', '13261', '13268',
            '13280', '13324', '13333', '13354', '13360', '13362', '13369',
            '13374', '13402', '13418', '13420', '13421', '13423', '13424',
            '13425', '13427', '13429', '13432', '13433', '13439', '13440',
            '13442', '13443', '13444', '13448', '13458', '13462', '13465',
            '13466', '13469', '13471', '13476', '13477', '13478', '13480',
            '13481', '13483', '13484', '13491', '13493', '13495', '13500',
            '13502', '13505', '13511', '13519', '13520', '13521', '13530',
            '13535', '13543', '13544', '13552', '13553', '13560', '13561',
            '13564', '13565', '13571', '13573', '13577', '13580', '13582',
            '13591', '13594', '13596', '13602', '13607', '13608', '13616',
            '13618', '13622', '13623', '13625', '13630', '13632', '13638',
            '13642', '13646', '13647', '13650', '13656', '13657', '13668',
            '13676', '13678', '13680', '13686', '13687', '13689', '13690',
            '13692', '13694', '13695', '13698', '13701', '13703', '13704',
            '13708', '13712', '13716', '13717', '13718', '13719', '13722',
            '13723', '13731', '13733', '13739', '13740', '13743', '13744',
            '13747', '13748', '13750', '13760', '13763', '13774', '13776',
            '13777', '13780', '13782', '13783', '13784', '13786', '13788',
            '13793', '13800', '13804', '13806', '13810', '13814', '13815',
            '13819', '13824', '13826', '13832', '13833', '13838', '13843',
            '13847', '13851', '13854', '13858', '13860', '13869', '13873',
            '13881', '13882', '13885', '13888', '13889', '13890', '13892',
            '13893', '13894', '13896', '13898', '13900', '13906', '13911',
            '13922', '13927', '13928', '13929', '13936', '13938', '13942',
            '13944', '13951', '13955', '13957', '13958', '13959', '13962',
            '13965', '13972', '13974', '13988', '13989', '13996', '13997',
            '13998', '14004', '14006', '14009', '14010', '14017', '14018',
            '14024', '14025', '14028', '14029', '14032', '14035', '14043',
            '14047', '14048', '14051', '14055', '14056', '14065', '14066',
            '14070', '14071', '14072', '14087', '14088', '14089', '14093',
            '14095', '14104', '14108', '14109', '14113', '14117', '14120',
            '14122', '14125', '14129', '14133', '14136', '14137', '14151',
            '14155', '14157', '14163', '14166', '14167', '14168', '14174',
            '14175', '14181', '14182', '14186', '14191', '14194', '14198',
            '14205', '14206', '14216', '14218', '14219', '14225', '14226',
            '14234', '14239', '14243', '14244', '14246', '14247', '14248',
            '14250', '14251', '14255', '14256', '14263', '14269', '14275',
            '14280', '14282'
        ]
        import dotastro_sciclass_tools
        dst = dotastro_sciclass_tools.Dotastro_Sciclass_Tools()
        dst.make_tutor_db_connection()
        #####

        xml_fpath_list = glob.glob(vosource_xml_dirpath + '/*xml')
        # KLUDGE: This can potentially load a lot of xml-strings into memory:
        for xml_fpath in xml_fpath_list:
            fname = xml_fpath[xml_fpath.rfind('/') + 1:xml_fpath.rfind('.')]
            num = fname  # Seems OK: ?CAN I just use the filename rather than the sourceid?  # xml_fname[:xml_fname.rfind('.')]
            #srcid_xml_tuple_list.append((num, xml_fpath))

            #task_str = """cat = os.getpid()"""
            #taskid = self.tc.run(client.StringTask(task_str, pull="cat"))
            #time.sleep(1)
            #print self.tc.get_task_result(taskid, block=False).results
            #print 'yo'

            ##### For testing:
            #if "100017522.xml" in xml_fpath:
            #    print "yo"
            if 0:
                import pdb
                pdb.set_trace()
                print
                num_orig_str = str(int(num) - 100000000)
                if num_orig_str in skipped_deb_srcids:
                    #print num_orig_str
                    select_str = "select sources.source_id, sources.project_id, sources.source_name, sources.class_id, sources.pclass_id, project_classes.pclass_name, project_classes.pclass_short_name from Sources join project_classes using (pclass_id) where source_id = %s" % (
                        num_orig_str)
                    dst.cursor.execute(select_str)
                    results = dst.cursor.fetchall()

                    a = arffify.Maker(
                        search=[],
                        skip_class=False,
                        local_xmls=True,
                        convert_class_abrvs_to_names=False,
                        flag_retrieve_class_abrvs_from_TUTOR=False,
                        dorun=False)
                    out_dict = a.generate_arff_line_for_vosourcexml(
                        num=str(num), xml_fpath=xml_fpath)
                    print '!!!', results[0]
                else:
                    try:
                        a = arffify.Maker(
                            search=[],
                            skip_class=False,
                            local_xmls=True,
                            convert_class_abrvs_to_names=False,
                            flag_retrieve_class_abrvs_from_TUTOR=False,
                            dorun=False)
                        out_dict = a.generate_arff_line_for_vosourcexml(
                            num=str(num), xml_fpath=xml_fpath)
                    except:
                        print "barf on some xml:", xml_fpath

        #print xml_fpath
        #continue
        #####

            if 1:
                exec_str = """out_dict = a.generate_arff_line_for_vosourcexml(num="%s", xml_fpath="%s")
                """ % (str(num), xml_fpath)
                #print exec_str
                try:
                    taskid = self.tc.run(client.StringTask(exec_str, \
                                            pull='out_dict', retries=3))
                    self.task_id_list.append(taskid)
                except:
                    print "EXCEPT!: taskid=", taskid, exec_str