Exemplo n.º 1
0
            q_msg.change_visibility(15)
            q_msg = sqs_queue.read(5)
            continue

      if q_msg != None:
         # This message will be the status update
         status_classad = msg.class_ad
         s3_key = msg.s3_key

         # Add the S3 Key ID if it isn't there already.  This happens
         # in the case where nothing was needed to be transfered to
         # the execute node
         if s3_key != None and ad_s3key == '':
            update_classad += 'S3KeyID = "%s"\n' % s3_key
         else:
            key = grep('^S3KeyID\s*=\s*"(.*)"$', status_classad)
            if s3_key == None and ad_s3key == '' and \
               key != None and key[0] != None:
               update_classad += 'S3KeyID = "%s"\n' % key[0]

         # Look for EC2 job parameters
         for param in ['EC2HookArg', 'EC2LastFailureReason']:
            value = grep('^(%s.*)' % param, status_classad)
            if value != None and value[0] != None:
               update_classad += '%s\n' % value[0]

         # If the message notifies of a run attempt, increment the counter
         # on the number of runs attempts
         run_try = grep('^EC2JobAttempted\s*=\s*(.*)$', status_classad)
         if run_try != None and run_try[0] != None and \
            run_try[0].strip().lower() == 'true':
Exemplo n.º 2
0
def main(argv=None):
   if argv == None:
      argv = sys.argv

   status_classad = ''
   aws_key = ''
   aws_secret = ''
   queue_name = ''
   update_classad = ''
   ad_s3key = ''
   job_completed = 'FALSE'
   update_skip_attribs = ['jobstatus', 'imagesize', 'enteredcurrentstatus',
                          'jobstartdate']
   attempts = 0
   region = ''

   for line in sys.stdin:
      match = grep('^([^=]*)\s*=\s*(.*)$', line.strip())
      if match != None and match[0] != None and match[1] != None:
         attribute = match[0].strip().lower()
         val_match = grep('^"(.*)"$', match[1].strip())
         if val_match != None and val_match[0] != None:
            value = val_match[0].strip()
         else:
            value = match[1].strip()
         if attribute == 'amazonaccesskey':
            aws_key = value
            continue
         if attribute == 'amazonsecretkey':
            aws_secret = value
            continue
         if attribute == 'ec2jobsuccessful':
            job_completed = value
            continue
         if attribute == 'amazonfullsqsqueuename':
            queue_name = value
            continue
         if attribute == 'ec2runattempts':
            attempts = int(value)
            continue
         if attribute == 's3keyid':
            ad_s3key = value
            continue
         if attribute == 'ec2region':
            region = value

   # Get the specified Amazon key information
   if os.path.exists(aws_key) == False or os.path.exists(aws_secret) == False:
      sys.stderr.write('Error: Unable to read AWS key files')
      return(FAILURE)
   else:
      key_file = open(aws_key, 'r')
      aws_key_val = key_file.readlines()[0].strip()
      key_file.close()
      key_file = open(aws_secret, 'r')
      aws_secret_val = key_file.readlines()[0].strip()
      key_file.close()

   # Look for an update
   try:
      r_obj = AWSRegion.get_sqs_region(region)
      sqs_con = SQSConnection(aws_key_val, aws_secret_val, region=r_obj)
   except BotoServerError, error:
      sys.stderr.write('Error: Unable to connect to SQS: %s, %s\n' % (error.reason, error.body))
      return(FAILURE)
Exemplo n.º 3
0
def main(argv=None):
   if argv == None:
      argv = sys.argv

   aws_key = ''
   aws_secret = ''
   s3_bucket_obj = ''
   stdout = ''
   stderr = ''
   ec2_success = "false"
   ret_val = SUCCESS
   cluster = 0
   proc = 0
   done_classad = ''
   s3_key = ''
   remove_attrs = ['hookkeyword']
   region = ''

   # Read the source class ad from stdin and store it as well as the
   # job status.  The end of the source job is noted by '------'
   for line in sys.stdin:
      if line.strip() == '------':
         break
      match = grep('^([^=]*)\s+=\s+(.*)$', line.strip())
      if match != None and match[0] != None and match[1] != None:
         attribute = match[0].strip().lower()
         val_match = grep('^"(.*)"$', match[1].strip())
         if val_match != None and val_match[0] != None:
            value = val_match[0].strip()
         else:
            value = match[1].strip()
         if attribute == 'iwd':
            iwd = value
            continue
         if attribute == 'out' and value.lower() != '/dev/null':
            stdout = value
            continue
         if attribute == 'err' and value.lower() != '/dev/null':
            stderr = value
            continue
         if attribute == 'clusterid':
            cluster = value
            continue
         if attribute == 'procid':
            proc = value
            continue

   # Read the routed class ad from stdin and store the S3 information and
   # the job status
   for line in sys.stdin:
      match = grep('^([^=]*)\s*=\s*(.*)$', line.strip())
      if match != None and match[0] != None and match[1] != None:
         attribute = match[0].strip().lower()
         val_match = grep('^"(.*)"$', match[1].strip())
         if val_match != None and val_match[0] != None:
            value = val_match[0].strip()
         else:
            value = match[1].strip()
         if attribute == 's3bucketid':
            bucket = value
            continue
         if attribute == 's3keyid':
            s3_key = value
            continue
         if attribute == 'amazonaccesskey':
            aws_key = value
            continue
         if attribute == 'amazonsecretkey':
            aws_secret = value
            continue
         if attribute == 'ec2jobsuccessful':
            ec2_success = value
            continue
         if attribute == 'amazonfullsqsqueuename':
            queue_name = value
            continue
         if attribute == 'ec2region':
            region = value

   # If the source job is not in the completed state, but the routed job is
   # then there was a failure running the AMI.  Exit with status 2 so the
   # job will be re-routed.
   if ec2_success.lower() == "false":
      sys.stderr.write('Job %d.%d did not complete.  Forcing the job to be routed again\n' % (int(cluster), int(proc)))
      return(FAILURE)

   # Pull the specific keys out of the files
   if os.path.exists(aws_key) == False or os.path.exists(aws_secret) == False:
      sys.stderr.write('Error: Unable to read AWS key files')
      return(FAILURE)
   else:
      key_file = open(aws_key, 'r')
      aws_key_val = key_file.readlines()[0].strip()
      key_file.close()
      key_file = open(aws_secret, 'r')
      aws_secret_val = key_file.readlines()[0].strip()
      key_file.close()

   # Connect to S3
   failed = 1
   for attempt in range(1,5):
      try:
         s3_con = S3Connection(aws_key_val, aws_secret_val)
         s3_bucket_obj = s3_con.get_bucket(bucket)
         failed = 0
         break
      except BotoServerError, error:
         sys.stderr.write('Error accessing S3: %s, %s\n' % (error.reason, error.body))
         time.sleep(5)
         pass
Exemplo n.º 4
0
      q_msg = sqs_queue.read(5)
      while q_msg != None:
         try:
            msg = pickle.loads(q_msg.get_body())
         except:
            # Likely bad message in the queue so skip it by setting the
            # visibility timer far enough in the future that we're unlikely
            # to hit it again this pass but not so far that it won't be seen
            # for a long time, and then move on to the next message
            q_msg.change_visibility(15)
            q_msg = sqs_queue.read(5)
            continue

         # Check the job status to see if this message notifies of
         # job completion
         job_status = grep('^JobStatus\s*=\s*(.)$', msg.class_ad)
         if job_status != None and job_status[0] != None and \
            int(job_status[0].strip()) == 4:
            # We found the update that indicates the job completed.  This
            # message is the update to the source job
            done_classad = msg.class_ad
            break
         else:
            q_msg = sqs_queue.read(5)

   if done_classad != '':
      # Remove attributes that shouldn't be updated
      for rm_attr in remove_attrs:
         for line in done_classad.split('\n'):
            match = grep('^([^=]*)\s*=\s*(.*)$', line.strip())
            if match != None and match[0] != None and match[1] != None:
Exemplo n.º 5
0
def main(argv=None):
   if argv == None:
      argv = sys.argv

   max_key_id = 1000000000
   sqs_data = SQSEntry()
   grid_classad = ''
   iwd = ''
   create_sandbox = 'no'
   transfer_exe = 'true'
   skip_attribs = ['clusterid', 'procid', 'bufferblocksize', 'buffersize',
                   'condorplatform', 'condorversion', 'coresize',
                   'qdate', 'remotewallclocktime', 'servertime',
                   'autoclusterid', 'autoclusterattrs', 'currenthosts', 
                   'routedtojobid', 'managed', 'managedmanager', 'periodichold',
                   'periodicremove', 'periodicrelease']
   int_reset_attribs = ['exitstatus', 'completiondate', 'localsyscpu',
                        'localusercpu', 'numckpts', 'numrestarts',
                        'numsystemholds', 'committedtime', 'totalsuspensions',
                        'lastsuspensiontime','cumulativesuspensiontime']
   float_reset_attribs = ['remoteusercpu', 'remotesyscpu']
   transfer_attribs = ['cmd', 'command', 'in', 'transferinput']
   delim = '------'
   aws_key = ''
   aws_secret = ''
   aws_public_key = ''
   aws_private_key = ''
   bucket_id = ''
   rsa_public_key = ''
   proc_id = ''
   cluster_id = ''
   qdate = ''
   delay = ''
   s3_key = ''
   route_name = ''
   ami = ''
   instance = ''
   resource_url = 'https://ec2.amazonaws.com/'
   region = ''

   # Parse the route information from stdin.
   route = grep('^\[\s*(.*)\s*\]$', sys.stdin.readline())[0]
   for line in route.split(';'):
      match = grep('^([^=]*)\s*=\s*(.*)$', line.strip())
      if match != None and match[0] != None and match[1] != None:
         attribute = match[0].strip().lower()
         val_match = grep('^"(.*)"$', match[1].strip())
         if val_match != None and val_match[0] != None:
            value = val_match[0].strip()
         else:
            value = match[1].strip()
         if attribute == 'name':
            route_name = value
            continue
         if attribute == 'set_amazonpublickey':
            aws_public_key = value
            continue
         if attribute == 'set_amazonprivatekey':
            aws_private_key = value
            continue
         if attribute == 'set_amazonaccesskey':
            aws_key = value
            continue
         if attribute == 'set_amazonsecretkey':
            aws_secret = value
            continue
         if attribute == 'set_amazons3bucketname':
            bucket_id = value
            continue
         if attribute == 'set_rsapublickey':
            rsa_public_key = value
            continue
         if attribute == 'set_amazonamishutdowndelay':
            delay = value
            continue
         if attribute == 'set_amazonamiid':
            ami = value
            continue
         if attribute == 'set_amazoninstancetype':
            instance = value
            continue
         if attribute == 'set_ec2region':
            region = value
            continue

   # Read the original class ad from stdin and store it for submission
   # to SQS.  Additionally, convert it to an EC2 classad for output
   for line in sys.stdin:
      if line.strip() == delim:
         continue
      match = grep('^([^=]*)\s*=\s*(.*)$', line)
      if match != None and match[0] != None and match[1] != None:
         attribute = match[0].strip()
         lower_attr = attribute.lower()
         val_match = grep('^"(.*)"$', match[1].strip())
         if val_match != None and val_match[0] != None:
            value = val_match[0].strip()
         else:
            value = match[1].strip()
         if lower_attr == 'iwd':
            # Remove the IWD from the class ad so the execute directory
            # will be used
            iwd = value
         if lower_attr == 'amazonpublickey':
            if aws_public_key == '':
               user_aws_public_key = value
            continue
         if lower_attr == 'amazonprivatekey':
            if aws_private_key == '':
               user_aws_private_key = value
            continue
         if lower_attr == 'amazonaccesskey':
            if aws_key == '':
               user_aws_key = value
            continue
         if lower_attr == 'amazonsecretkey':
            if aws_secret == '':
               user_aws_secret = value
            continue
         if lower_attr == 'rsapublickey':
            if rsa_public_key == '':
               user_rsa_public_key = value
            continue
         if lower_attr == 'clusterid':
            cluster_id = value
            continue
         if lower_attr == 'procid':
            proc_id = value
            continue
         if lower_attr == 'qdate':
            qdate = value
            continue
         if lower_attr in skip_attribs:
            continue
         sqs_data.class_ad += str(line)

         if lower_attr == 'globaljobid':
            continue
         if lower_attr == 'ec2region':
            if region == '':
               region = value
         if lower_attr == 'jobuniverse':
            grid_classad += 'JobUniverse = 9\n'
            grid_classad += 'Remote_JobUniverse = ' + str(value) + '\n'
            continue
         if lower_attr in int_reset_attribs:
            grid_classad += attribute + ' = 0\n'
            continue
         if lower_attr in float_reset_attribs:
            grid_classad += attribute + ' = 0.0\n'
            continue
         if lower_attr == 'jobstatus':
            grid_classad += attribute + ' = 1\n'
            continue
         if lower_attr == 'exitbysignal':
            grid_classad += attribute + ' = FALSE\n'
            continue
         if lower_attr == 'shouldtransferfiles':
            create_sandbox = value.lower()
            grid_classad += attribute + ' = "NO"\n'
            continue
         if lower_attr == 'transferexecutable':
            transfer_exe = value.lower()
         if lower_attr == 'cmd' or lower_attr == 'command':
            executable = value
            grid_classad += 'Cmd = "EC2: %s: %s"\n' % (route_name, value)
            continue
      grid_classad += str(line)

   job_queue = '%s%s%s' % (cluster_id, proc_id, qdate)
   sqs_data.class_ad += 'AmazonFullSQSQueueName = "%s"\n' % job_queue
   if delay != '':
      sqs_data.class_ad += 'amazonamishutdowndelay = %s\n' % delay
   sqs_data.class_ad += 'WantAWS = False\n'
   grid_classad += 'AmazonFullSQSQueueName = "%s"\n' % job_queue

   # Search through the class ad and make modifications to the files/paths
   # as necessary
   new_ad = ''
   files = []
   for line in sqs_data.class_ad.split('\n'):
      match = grep('^([^=]*)\s*=\s*(.*)$', line)
      if match != None and match[0] != None and match[1] != None:
         attribute = match[0].strip()
         attr_lower = attribute.lower()
         value = match[1].strip()

         # Ignore files in /dev (like /dev/null)
         if grep('^"/dev/.*"', value) != None:
            continue

         # Remove quotes if they exist.  This is a string, so try to split on
         # the '/' character.  If a / exists, it's a file with a full path
         match = grep('^"(.*)"$', value)
         if match != None and match[0] != None:
            split_val = os.path.split(match[0])

         # Replace these attributes in the job class ad or the AMI instance
         # will fail.  Need to remove any reference to directories so all
         # files will be put in the temporary execute directory on the
         # machine executing the job
         if attr_lower in transfer_attribs and create_sandbox == 'yes':
            # Don't mess with the command if it won't be transfered to
            # the remote system.  This likely means the exe already exists
            # where the job will be executed
            if attr_lower == 'cmd' and transfer_exe == 'false':
               new_ad += line + '\n'
               continue
            if split_val[0] == '':
               files.append(iwd + '/' + match[0].strip() + '\n')
            elif os.path.exists(split_val[0]) == False:
               files.append(iwd + '/' + split_val[1] + '\n')
            else:
               files.append(match[0].strip() + '\n')
            new_ad += attribute + ' = "' + split_val[1].strip() + '"\n'
            continue

         # Set stdout/stderr files to be created in the sandox so they will
         # be transfered back if they are actual files.
         if (attr_lower == 'err' or attr_lower == 'out') and \
            value != '/dev/null':
            new_ad += attribute + ' = "' + os.path.basename(split_val[1]) + '"\n'
            continue
            
         new_ad += line + '\n'
   sqs_data.class_ad = new_ad

   # Add user EC2 specific information to the grid class ad if not set by the
   # route
   if aws_public_key == '':
      if user_aws_public_key != '':
         grid_classad += 'AmazonPublicKey = "%s"\n' % str(user_aws_public_key)
      else:
         sys.stderr.write('Error: No Public Key defined by the job or the route')
         return(FAILURE)

   if aws_private_key == '':
      if user_aws_private_key != '':
         grid_classad += 'AmazonPrivateKey = "%s"\n' % str(user_aws_private_key)
      else:
         sys.stderr.write('Error: No Private Key defined by the job or the route')
         return(FAILURE)

   if aws_key == '':
      if user_aws_key != '':
         grid_classad += 'AmazonAccessKey = "%s"\n' % str(user_aws_key)
         aws_key_file = user_aws_key
      else:
         sys.stderr.write('Error: No Access Key defined by the job or the route')
         return(FAILURE)
   else:
      aws_key_file = aws_key

   if aws_secret == '':
      if user_aws_secret != '':
         grid_classad += 'AmazonSecretKey = "%s"\n' % str(user_aws_secret)
         aws_secret_file = user_aws_secret
      else:
         sys.stderr.write('Error: No Secret Key defined by the job or the route')
         return(FAILURE)
   else:
      aws_secret_file = aws_secret

   if rsa_public_key == '':
      if user_rsa_public_key != '':
         rsa_public_key_file = user_rsa_public_key
      else:
         sys.stderr.write('Error: No Secret Key defined by the job or the route')
         return(FAILURE)
   else:
      rsa_public_key_file = rsa_public_key

   sqs_data.class_ad += 'AmazonAccessKey = "%s"\n' % str(aws_key_file)
   sqs_data.class_ad += 'AmazonSecretKey = "%s"\n' % str(aws_secret_file)

   # Pull the specific keys out of the files
   if os.path.exists(rsa_public_key_file) == False:
      sys.stderr.write('Error: Unable to read RSA public key file')
      return(FAILURE)
   elif os.path.exists(aws_key_file) == False or \
      os.path.exists(aws_secret_file) == False:
      sys.stderr.write('Error: Unable to read AWS key files')
      return(FAILURE)
   else:
      try:
        key_file = open(aws_key_file, 'r')
        aws_key_val = key_file.readlines()[0].strip()
        key_file.close()
        key_file = open(aws_secret_file, 'r')
        aws_secret_val = key_file.readlines()[0].strip()
        key_file.close()
        key_file = open(rsa_public_key_file, 'r')
        key_file.close()
      except IOError, e:
        sys.stderr.write("Error:  Unable to open file")
        sys.stderr.write(str(e))
        return(FAILURE)
Exemplo n.º 6
0
def main(argv=None):
   if argv == None:
      argv = sys.argv

   bucket = ''
   key = ''
   queue_name = ''
   region = ''
   ret_val = SUCCESS

   # Read the class ad from stdin and store the S3 information
   for line in sys.stdin:
      match = grep('^([^=]*)\s*=\s*(.*)$', line.strip())
      if match != None and match[0] != None and match[1] != None:
         attribute = match[0].strip().lower()
         val_match = grep('^"(.*)"$', match[1].strip())
         if val_match != None and val_match[0] != None:
            value = val_match[0].strip()
         else:
            value = match[1].strip()
         if attribute == 's3bucketid':
            bucket = value
            continue
         if attribute == 's3keyid':
            key = value
            continue
         if attribute == 'amazonaccesskey':
            aws_key = value
            continue
         if attribute == 'amazonsecretkey':
            aws_secret = value
            continue
         if attribute == 'amazonfullsqsqueuename':
            queue_name = value
            continue
         if attribute == 'ec2region':
            region = value


   # Pull the specific keys out of the files
   if os.path.exists(aws_key) == False or \
      os.path.exists(aws_secret) == False:
      sys.stderr.write('Error: Unable to read AWS key files')
      return(FAILURE)
   else:
      key_file = open(aws_key, 'r')
      aws_key_val = key_file.readlines()[0].strip()
      key_file.close()
      key_file = open(aws_secret, 'r')
      aws_secret_val = key_file.readlines()[0].strip()
      key_file.close()

   # Remove messages from SQS
   work_queue = None
   results_queue = None
   full_queue_name = '%s-%s' % (str(aws_key_val), queue_name)
   try:
      r_obj = AWSRegion.get_sqs_region(region)
      sqs_con = SQSConnection(aws_key_val, aws_secret_val, region=r_obj)
   except BotoServerError, error:
      sys.stderr.write('Error: Unable to connect to SQS: %s, %s\n' % (error.reason, error.body))
      return(FAILURE)
Exemplo n.º 7
0
                  return(FAILURE)
               continue
   
            # Grab the S3 bucket if it wasn't in the input classad
            if bucket == '':
               try:
                  bucket = q_msg.s3_bucket
               except:
                  # Message had no s3_bucket for some reason.
                  sys.stderr.write('Error: Message has no S3 bucket\n')
                  return(FAILURE)
   
            # Grab the S3 key if it wasn't defined already
            if key == '':
               if msg.s3_key == None:
                  s3_key = grep('^S3KeyID\s*=\s*"(.+)"$', msg.class_ad)
                  if s3_key == None or s3_key[0] == None:
                     s3_key = grep('^s3keyid\s*=\s*"(.+)"$', msg.class_ad)
                  if s3_key != None and s3_key[0] != None:
                     key = s3_key[0]
               else:
                  key = msg.s3_key
   
            # Delete the message.  There may be more so keep processing
            queue.delete_message(q_msg)
   
            q_msg = queue.read()

   # Access S3
   try:
      s3_con = S3Connection(aws_key_val, aws_secret_val)