def initialize_queue(): queue_id = "".join(["%02x" % ord(x) for x in urandom(10)]) request_queue_name = "slurm-%s-request" % queue_id response_queue_name = "slurm-%s-response" % queue_id timeout = 43200 def usage(): stderr.write("""\ Usage: %s [--timeout=<timeout in seconds>] Timeout must be an integer from 0 to 43200 (12 hours). """ % (argv[0], )) return try: opts, args = getopt(argv[1:], "t:", ["timeout="]) except GetoptError: usage() return 1 if len(args) > 0: print("Unknown argument %s" % args[0], file=stderr) usage() return 1 for opt, value in opts: if opt in ("-t", "--timeout"): try: timeout = int(value) if not (0 <= timeout <= 43200): raise ValueError() except ValueError: print("Invalid timeout value %r" % value, file=stderr) usage() return 1 sqs = get_sqs() request_queue = sqs.create_queue(request_queue_name, timeout) response_queue = sqs.create_queue(response_queue_name, timeout) try: request_queue.set_attribute("ReceiveMessageWaitTimeSeconds", 20) response_queue.set_attribute("ReceiveMessageWaitTimeSeconds", 20) except Exception as e: # Ignore if unsupported pass print("export SLURM_EC2_QUEUE_ID=%s" % queue_id) return 0
def log_sc_startup( ): sqs =boto.sqs.connect_to_region("us-east-1") q = sqs.create_queue('starcluster-results') msg = q.read(120) msg_comb = defaultdict(list) err = '' clusters = set() while msg: mymsg = json.loads(msg.get_body()) q.delete_message( msg ) mymsg_key = mymsg['cluster_name'] + '-' + mymsg['master_name'] msg_comb[mymsg_key].append(mymsg) msg = q.read(120) for key, msg_list in msg_comb.iteritems(): first = msg_list[0] msg_list.sort(key=lambda x: x['count']) clusters.add( first['cluster_name'] ) adv_ser = ANServer(first['master_name'], first['cluster_name'], no_create=True) log = adv_ser.startup_log for msg in msg_list: if 'time' not in msg: msg['time'] = datetime.now().isoformat() if msg['type'] == 'stdout': log += '[%s] %s\n' % (msg['time'], msg['msg']) if False and msg['type'] == 'stderr': if msg['msg'][:3] == '>>>': log += msg['msg'] + '\n' if msg['type'] == 'system': if msg['msg'][:8] == 'Complete': log += '=' * 60 + '\n' log += '[%s] %s\n' % (msg['time'], msg['msg']) adv_ser.set_ready() adv_ser.set_startup_log( log ) return list(clusters)
def cluster_terminate( starcluster_bin, url, master_name, cluster_name): base_message = {'cluster_name': cluster_name, 'master_name': master_name, 'component':'restart'} sqs =boto.sqs.connect_to_region("us-east-1") q = sqs.create_queue('starcluster-results') sc_command = "%s -c %s/%s/%s terminate -f -c %s " %( os.path.expanduser(starcluster_bin), url,master_name, cluster_name, cluster_name) base_message['command'] = sc_command sc_p = subprocess.Popen( sc_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True ) log_subprocess_messages( sc_p, q, base_message)
def setup(image_type): name = 'salt-%s' % image_type name = identify(name) print(name) create_policy(name, 'image_policy.json') queue = sqs.create_queue(name) mapping = {'region': region(), 'queue_url': queue.url, 'image_type': image_type} data = user_data('image_data', mapping) return name, data, queue
def gpu_logserver_daemon(starcluster_bin, url, master_name, cluster_name, action='start'): valid_actions = ['start', 'stop', 'status'] assert action in valid_actions, "%s is not a valid action for gpu" % action base_message = {'cluster_name': cluster_name, 'master_name': master_name, 'action': action, 'component': 'gpu-logserver-daemon' } sqs =boto.sqs.connect_to_region("us-east-1") q = sqs.create_queue('starcluster-results') sc_command = "%s -c %s/%s/%s sshmaster -u sgeadmin %s " %( os.path.expanduser(starcluster_bin), url,master_name, cluster_name, cluster_name) if action == 'start': sc_command += "'bash /home/sgeadmin/GPUDirac/scripts/logserver.sh start'" if action == 'status': sc_command += "'bash /home/sgeadmin/GPUDirac/scripts/logserver.sh status'" if action == 'stop': sc_command += "'bash /home/sgeadmin/GPUDirac/scripts/logserver.sh stop'" base_message['command'] = sc_command sc_p = subprocess.Popen( sc_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True ) log_subprocess_messages( sc_p, q, base_message)
class CloudWatchDaemon(Daemon): cloudwatch_opts = { 'cloudwatch_sqs_region': 'eu-west-1', 'cloudwatch_sqs_queue': 'cloudwatch-to-alerta', 'cloudwatch_access_key': '022QF06E7MXBSAMPLE', 'cloudwatch_secret_key': '' } def __init__(self, prog, **kwargs): config.register_opts(CloudWatchDaemon.cloudwatch_opts) Daemon.__init__(self, prog, kwargs) def run(self): self.running = True self.statsd = StatsD() # graphite metrics # Connect to message queue self.mq = Messaging() self.mq.connect(callback=CloudWatchMessage(self.mq)) self.dedup = DeDup(by_value=True) LOG.info('Connecting to SQS queue %s', CONF.cloudwatch_sqs_queue) try: sqs = boto.sqs.connect_to_region( CONF.cloudwatch_sqs_region, aws_access_key_id=CONF.cloudwatch_access_key, aws_secret_access_key=CONF.cloudwatch_secret_key) except boto.exception.SQSError, e: LOG.error('SQS API call failed: %s', e) sys.exit(1) try: q = sqs.create_queue(CONF.cloudwatch_sqs_queue) q.set_message_class(RawMessage) except boto.exception.SQSError, e: LOG.error('SQS queue error: %s', e) sys.exit(1)
def run_sc( starcluster_bin, url, master_name,cluster_name ): adv_ser = ANServer(master_name, cluster_name, no_create=True) pid = multiprocessing.current_process() base_message = {'cluster_name': cluster_name, 'master_name': master_name, 'pid':str(pid) } sqs =boto.sqs.connect_to_region("us-east-1") q = sqs.create_queue('starcluster-results') if adv_ser.active: base_message['type'] = 'system' base_message['msg'] = 'Error: already active' q.write( Message(body=json.dumps(message)) ) return sc_command = "%s -c %s/%s/%s start -c %s %s" %( os.path.expanduser(starcluster_bin), url,master_name, cluster_name, cluster_name, cluster_name) base_message['command'] = sc_command sc_p = subprocess.Popen( sc_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True ) adv_ser.set_active() adv_ser.set_startup_pid(str(sc_p.pid)) log_subprocess_messages( sc_p, q, base_message) adv_ser.set_ready() """
instance_id = None run_id = "abbey-{}-{}-{}".format( args.environment, args.deployment, int(time.time() * 100)) ec2_args = create_instance_args() print "{:<40}".format( "Creating SQS queue and launching instance for {}:".format(run_id)) print for k, v in ec2_args.iteritems(): if k != 'user_data': print " {:<25}{}".format(k, v) print sqs_queue = sqs.create_queue(run_id) sqs_queue.set_message_class(RawMessage) res = ec2.run_instances(**ec2_args) inst = res.instances[0] instance_id = inst.id print "{:<40}".format("Waiting for running status:"), status_start = time.time() for _ in xrange(EC2_RUN_TIMEOUT): res = ec2.get_all_instances(instance_ids=[instance_id]) if res[0].instances[0].state == 'running': status_delta = time.time() - status_start run_summary.append(('EC2 Launch', status_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format( status_delta / 60, status_delta % 60)
try: sqs_queue = None instance_id = None run_id = "abbey-{}-{}-{}".format(args.environment, args.deployment, int(time.time() * 100)) ec2_args = create_instance_args() print "{:<40}".format("Creating SQS queue and launching instance for {}:".format(run_id)) print for k, v in ec2_args.iteritems(): if k != "user_data": print " {:<25}{}".format(k, v) print sqs_queue = sqs.create_queue(run_id) sqs_queue.set_message_class(RawMessage) res = ec2.run_instances(**ec2_args) inst = res.instances[0] instance_id = inst.id print "{:<40}".format("Waiting for running status:"), status_start = time.time() for _ in xrange(EC2_RUN_TIMEOUT): res = ec2.get_all_instances(instance_ids=[instance_id]) if res[0].instances[0].state == "running": status_delta = time.time() - status_start run_summary.append(("EC2 Launch", status_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(status_delta / 60, status_delta % 60) break else:
def launch_and_configure(ec2_args): """ Creates an sqs queue, launches an ec2 instance, configures it and creates an AMI. Polls SQS for updates """ print "{:<40}".format( "Creating SQS queue and launching instance for {}:".format(run_id)) print for k, v in ec2_args.iteritems(): if k != 'user_data': print " {:<25}{}".format(k, v) print global sqs_queue global instance_id sqs_queue = sqs.create_queue(run_id) sqs_queue.set_message_class(RawMessage) res = ec2.run_instances(**ec2_args) inst = res.instances[0] instance_id = inst.id print "{:<40}".format( "Waiting for instance {} to reach running status:".format(instance_id)), status_start = time.time() for _ in xrange(EC2_RUN_TIMEOUT): res = ec2.get_all_instances(instance_ids=[instance_id]) if res[0].instances[0].state == 'running': status_delta = time.time() - status_start run_summary.append(('EC2 Launch', status_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format( status_delta / 60, status_delta % 60) break else: time.sleep(1) else: raise Exception("Timeout waiting for running status: {} ".format( instance_id)) print "{:<40}".format("Waiting for system status:"), system_start = time.time() for _ in xrange(EC2_STATUS_TIMEOUT): status = ec2.get_all_instance_status(inst.id) if status[0].system_status.status == u'ok': system_delta = time.time() - system_start run_summary.append(('EC2 Status Checks', system_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format( system_delta / 60, system_delta % 60) break else: time.sleep(1) else: raise Exception("Timeout waiting for status checks: {} ".format( instance_id)) print print "{:<40}".format( "Waiting for user-data, polling sqs for Ansible events:") (ansible_delta, task_report) = poll_sqs_ansible() run_summary.append(('Ansible run', ansible_delta)) print print "{} longest Ansible tasks (seconds):".format(NUM_TASKS) for task in sorted( task_report, reverse=True, key=lambda k: k['DELTA'])[:NUM_TASKS]: print "{:0>3.0f} {}".format(task['DELTA'], task['TASK']) print " - {}".format(task['INVOCATION']) print print "{:<40}".format("Creating AMI:"), ami_start = time.time() ami = create_ami(instance_id, run_id, run_id) ami_delta = time.time() - ami_start print "[ OK ] {:0>2.0f}:{:0>2.0f}".format( ami_delta / 60, ami_delta % 60) run_summary.append(('AMI Build', ami_delta)) total_time = time.time() - start_time all_stages = sum(run[1] for run in run_summary) if total_time - all_stages > 0: run_summary.append(('Other', total_time - all_stages)) run_summary.append(('Total', total_time)) return run_summary, ami
parts = hdr.split("=") out[parts[0]] = parts[1].strip() hdr = inf.readline() return {"aws_access_key_id": out["AWSAccessKeyId"], "aws_secret_access_key": out["AWSSecretKey"]} # Colon format elif hdr[0] == "#": while hdr[0] == "#": hdr = inf.readline() out = dict() while hdr: parts = hdr.split(":") out[parts[0]] = parts[1].strip() hdr = inf.readline() return {"aws_access_key_id": out["accessKeyId"], "aws_secret_access_key": out["secretKey"]} # IAM format else: keys = inf.readline().split(",") return {"aws_access_key_id": keys[1].strip(), "aws_secret_access_key": keys[2].strip()} keys = getKeys("rootkey.csv") region = "us-west-2" queue_name = "test" if not queue_name: raise Exception("You must set a queue name.") sqs = boto.sqs.connect_to_region(region, **keys) queue = sqs.get_queue(queue_name) or sqs.create_queue(queue_name)
import cred_conf import boto.sqs, boto.sns from boto.sqs.message import Message import json import time import logging FORMAT = r'%(asctime)s - %(name)s - %(levelname)s - %(message)s' logging.basicConfig(stream=sys.stderr, level=logging.INFO, format=FORMAT) sqs = boto.sqs.connect_to_region( "us-east-1", aws_access_key_id=cred_aws.aws_access_key_id, aws_secret_access_key=cred_aws.aws_secret_access_key) my_queue = sqs.get_queue('myqueue') or sqs.create_queue('myqueue') sns = boto.sns.connect_to_region( "us-east-1", aws_access_key_id=cred_aws.aws_access_key_id, aws_secret_access_key=cred_aws.aws_secret_access_key) topicarn = cred_aws.aws_sns_topicarn # Flask app object application = app = Flask(__name__) app.config.from_object(cred_conf) app.config['SQLALCHEMY_DATABASE_URI'] = cred_db.SQLALCHEMY_DATABASE_URI daemon = None # Dabatase connection db = SQLAlchemy(app)
#!/usr/bin/env python import sys import boto.sqs import os sqs = boto.sqs.connect_to_region(os.getenv('AWS_DEFAULT_REGION'], 'us-east-1')) queue = sqs.create_queue(os.getenv('SQS_QUEUE', 'dump1090')) messages = list() batch = list() for line in sys.stdin: messages.append( line.rstrip() ) if len(messages) == 10: # print ';'.join(messages) for i, message in enumerate(messages): batch.append( (i+1, message, 0) ) queue.write_batch(batch) batch = list() messages = list()
temp.seek(0) key = Key(self.bucket) if self._path: key.key = '%s/%s.manifest.json' % ( self._path, self.archive_hash) else: key.key = '%s.manifest.json' % self.archive_hash key.set_contents_from_file( temp ) run_mdl.insert_ANRunArchive( self.run_id, self.archive_hash, self._arch_ctr, bucket = self._bucket_name, archive_manifest = '%s.manifest.json' % self.archive_hash, path = self._path, truth = self._truth) if __name__ == "__main__": sqs = boto.connect_sqs() d2a = sqs.create_queue( 'from-data-to-agg-b6-canonical-q92-bak' ) archive = S3ResultSetArchive('this-is-a-test-run-id', 'an-scratch-bucket', path="S3ResultSetArchiveTest3", num_result_sets=9 ) ctr = 0 for i in range(2): messages = d2a.get_messages(10) for message in messages: ctr += 1 instructions = json.loads( message.get_body() ) rs = S3ResultSet(instructions, 'an-from-gpu-to-agg-b6-canonical-q92') """ print "rs.nsamp" print rs.nsamp print "rs.file_id" print rs.file_id print "rs.nnets"
def deprecated_run_once(comm, mask_id, sqs_data_to_agg, sqs_truth_to_agg, sqs_recycling_to_agg, s3_from_gpu, s3_results, run_truth_table, s3_csvs ): by_network = True rec = None if comm.rank == 0: sqs = boto.connect_sqs() d2a = sqs.create_queue( sqs_data_to_agg ) d2a_bak = sqs.create_queue( sqs_recycling_to_agg ) print "Num data %i in %s" % (d2a.count(), sqs_data_to_agg) print "Num data %i in %s" % (d2a_bak.count(), sqs_recycling_to_agg) if d2a.count() > d2a_bak.count(): rec = False else: assert d2a_bak.count() > 0, "both queues empty" rec = True rec = comm.bcast(rec) if rec: sqs_data_to_agg, sqs_recycling_to_agg = sqs_recycling_to_agg, sqs_data_to_agg if comm.rank == 0: print "I want the truth!!!" a = Truthiness( sqs_truth_to_agg, sqs_truth_to_agg, s3_from_gpu, s3_results, run_truth_table, by_network, mask_id) rs =a.get_result_set() if rs: while not a.handle_result_set(rs): print "not the truth", ctr rs =a.get_result_set() if rs is None: break comm.Barrier() #print "Aggregating", mask_id, sqs_data_to_agg, sqs_truth_to_agg, sqs_recycling_to_agg, s3_from_gpu, s3_results, run_truth_table, s3_csvs a = Aggregator( sqs_data_to_agg, sqs_recycling_to_agg, s3_from_gpu, s3_results, run_truth_table, by_network, mask_id) rs =a.get_result_set() if comm.rank == 0: rid = rs.get_run_id() st = rs.spec_string ctr = 0 while rs: ctr += 1 a.handle_result_set(rs) rs =a.get_result_set() comm.Barrier() acc_pre = "acc-k-11-%i-%i" %(ctr, comm.rank) a.save_acc( '/scratch/sgeadmin', acc_pre) strains = a.acc_acc.keys() strains.sort() strains = comm.bcast(strains) zero = None for mat in a.acc_acc.itervalues(): zero = np.zeros_like(mat, dtype = np.int) for k in strains: if k in a.acc_acc: curr = a.acc_acc[k] else: curr = zero total = np.zeros_like(curr) comm.Reduce([curr, MPI.INT],[total, MPI.INT]) if comm.rank == 0: a.acc_acc[k] = total total_count = 0 print "acc", a.acc_count[k] total_count = comm.reduce(a.acc_count[k]) if comm.rank == 0: print "total obs. %i" % total_count divisor = float(total_count) pv_table = a.acc_acc[k]/divisor file_loc = '/scratch/sgeadmin/pvals-%s-%s-%s.csv' % ( a.run_config['run_settings']['k'], a.run_id, mask_id) a.generate_csv( pv_table, column_names = a.get_mask_labels(), index=a.networks, filename=file_loc) a.write_csv(s3_csvs, file_loc) try: res = TruthGPUDiracModel.query(rid, strain_id__eq=st) for r in res: r.pval_file = os.path.split(file_loc)[1] r.mask = a.get_mask_labels() r.save() except Exception as e: print "Unable to store in dynamo" print "%r" % e if comm.rank==0: a.save_acc( '/scratch/sgeadmin', 'acc-k-11-combined-total' ) comm.Barrier()
#!/usr/bin/env python import sys, asyncore, asynchat, time, socket, os, boto, boto.ec2, boto.sqs from multiprocessing import Process, Pool, Manager from boto.sqs.message import Message data = {} joblist = {} m = Manager() workqueue = m.Queue() resultqueue = m.Queue() sqs = boto.sqs.connect_to_region("us-west-2") requests = sqs.create_queue('requests') responses = sqs.create_queue('responses') ec2 = boto.ec2.connect_to_region("us-west-2") def watchresult(s): while True: if not resultqueue.empty(): #print "returning" msg = resultqueue.get() resultqueue.task_done() s.sendall("DONE "+msg+"\r\n") #remote worker code rs = responses.get_messages() if len(rs) > 0: m = rs[0] s.sendall("DONE "+m.get_body()+"\r\n") responses.delete_message(m)
from shapely.geometry import Polygon import time import traceback import json import boto.sqs from boto.sqs.message import Message import traceback # apikey = "" # apisecret = "" # accesstoken = "" # accesssecret = "" execfile("creds.py") sqs = boto.sqs.connect_to_region("us-west-2") myQueue = sqs.create_queue("cloud_pr2_hp") begtime = time.time() class MyStreamListener(tweepy.StreamListener): def on_status(self, status): if status.place: author = status.author.name text = status.text tweetId = status.id lt = [tuple(l) for l in status.place.bounding_box.coordinates[0]] polygon = Polygon(lt) lat = polygon.centroid.y lon = polygon.centroid.x tweetdict = dict(author=author,status=text,tweetId=tweetId,longitude=lon,latitude=lat)
elif hdr[0] == '#': while hdr[0] == '#': hdr = inf.readline() out = dict() while hdr: parts = hdr.split(':') out[parts[0]] = parts[1].strip() hdr = inf.readline() return { 'aws_access_key_id': out['accessKeyId'], 'aws_secret_access_key': out['secretKey'] } # IAM format else: keys = inf.readline().split(',') return { 'aws_access_key_id': keys[1].strip(), 'aws_secret_access_key': keys[2].strip() } keys = getKeys('rootkey.csv') region = 'us-west-2' queue_name = "test" if not queue_name: raise Exception('You must set a queue name.') sqs = boto.sqs.connect_to_region(region, **keys) queue = sqs.get_queue(queue_name) or sqs.create_queue(queue_name)
__author__ = 'mhoyer' import boto.sqs from boto.sqs.message import Message sqs = boto.sqs.connect_to_region("eu-west-1") testqueue = sqs.create_queue('testqueue') # write message message = Message() message.set_body("My first test message") testqueue.write(message) # read message messages = testqueue.get_messages() print messages[0].get_body() sqs.delete_queue(testqueue)
def launch_and_configure(ec2_args): """ Creates an sqs queue, launches an ec2 instance, configures it and creates an AMI. Polls SQS for updates """ print "{:<40}".format( "Creating SQS queue and launching instance for {}:".format(run_id)) print for k, v in ec2_args.iteritems(): if k != 'user_data': print " {:<25}{}".format(k, v) print global sqs_queue global instance_id sqs_queue = sqs.create_queue(run_id) sqs_queue.set_message_class(RawMessage) res = ec2.run_instances(**ec2_args) inst = res.instances[0] instance_id = inst.id print "{:<40}".format( "Waiting for instance {} to reach running status:".format( instance_id)), status_start = time.time() for _ in xrange(EC2_RUN_TIMEOUT): res = ec2.get_all_instances(instance_ids=[instance_id]) if res[0].instances[0].state == 'running': status_delta = time.time() - status_start run_summary.append(('EC2 Launch', status_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(status_delta / 60, status_delta % 60) break else: time.sleep(1) else: raise Exception( "Timeout waiting for running status: {} ".format(instance_id)) print "{:<40}".format("Waiting for system status:"), system_start = time.time() for _ in xrange(EC2_STATUS_TIMEOUT): status = ec2.get_all_instance_status(inst.id) if status[0].system_status.status == u'ok': system_delta = time.time() - system_start run_summary.append(('EC2 Status Checks', system_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(system_delta / 60, system_delta % 60) break else: time.sleep(1) else: raise Exception( "Timeout waiting for status checks: {} ".format(instance_id)) print print "{:<40}".format( "Waiting for user-data, polling sqs for Ansible events:") (ansible_delta, task_report) = poll_sqs_ansible() run_summary.append(('Ansible run', ansible_delta)) print print "{} longest Ansible tasks (seconds):".format(NUM_TASKS) for task in sorted(task_report, reverse=True, key=lambda k: k['DELTA'])[:NUM_TASKS]: print "{:0>3.0f} {}".format(task['DELTA'], task['TASK']) print " - {}".format(task['INVOCATION']) print print "{:<40}".format("Creating AMI:"), ami_start = time.time() ami = create_ami(instance_id, run_id, run_id) ami_delta = time.time() - ami_start print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(ami_delta / 60, ami_delta % 60) run_summary.append(('AMI Build', ami_delta)) total_time = time.time() - start_time all_stages = sum(run[1] for run in run_summary) if total_time - all_stages > 0: run_summary.append(('Other', total_time - all_stages)) run_summary.append(('Total', total_time)) return run_summary, ami
# Script ############################################################################ #global variables and constants seed_list = ["Boat"] URL_ROOT = "" LINKS_ROOT = "" LOCAL_TEST = False archive = dict() status = dict() last_archive_time = datetime.datetime.now() sqs = boto.sqs.connect_to_region("us-east-1") #returns the existing queue if one exists already q = sqs.create_queue("wikiloteca_queue") s3 = S3Connection() bucket = s3.get_bucket("nickgtyson.wikiloteca") # title -> status, difficulty, time database = Table("wikiloteca") init("english", seed_list) articles_processed = 0 while (articles_processed < 3): article = choose_article() words = process_article(article) process_links(article)