def populate_cities(ncities, testing_prefix="", testing=False): # get command line options print "Running with "+str(ncities)+" cities to process" here = os.path.dirname(os.path.realpath(__file__))+"/" cities = [] greenspace.init() # we break the world into 4 'regions' so we select ncities from each region #for region_num in xrange(1,5): region_num = 1 cities.extend(greenspace.get_cities(region_num, ncities)) if testing: cities = [] for i in xrange(0,ncities): cities.append([FAKE_CITY]) print cities manager = taskmanager.TaskManager(prefix=testing_prefix) manager.reset() for i,city in enumerate(cities): job = json.dumps(city ,separators=(',',':')) if testing: manager.add_task({'task':'greencities','data':job}) else: clust = decide_cluster(i, ncities) print ">> Enqueue", city[1], "on", taskmanager._sites[clust] manager.add_task({'task':'greencities','data':job}, clust ) return manager.get_size()
def mapper_init(x,y): print "Mapper Init" greenspace.init() print "Mapper Init Done" if __name__ == '__main__': parser = optparse.OptionParser() parser.add_option("-c", "--num_cities", dest="num_cities", type="int", default=2, help="number of cities to run the calculation on") parser.add_option("-m", "--num_mappers", dest="num_mappers", type="int", default=4, help="number of nodes to map the job to") (options, args_not_used) = parser.parse_args() print "Running with "+str(options.num_cities)+" cities and "+str(options.num_mappers)+" mappers" here = os.path.dirname(os.path.realpath(__file__))+"/" greenspace.init() cities = greenspace.get_cities(1, options.num_cities) input_files = [] fds = [] for n in xrange(options.num_mappers): fid,name = tempfile.mkstemp(dir=".") print "Temp data file", fid, name f = os.fdopen(fid, "w+b") fds.append(f) input_files.append(name) for i,c in enumerate(cities): fd = fds[i%len(input_files)] json.dump(c,fd ,separators=(',',':')) fd.write("\n") print "City", c[1], "to",fd for f in fds: