def get_flo(self): s3_client = boto3.client('s3') debug("in get_flo") try: result = s3_client.get_object(Bucket=self.bucket_name, Key=self.key_name) except Exception as e: text = repr(e)[:256] self.add_error("failed to process s3 object {}/{} '{}'".format( self.bucket_name, self.key_name, text)) # issue #14 - the below decode majik is from AWS sample code. new_key = urllib.unquote_plus(self.key_name.encode('utf8')) self.add_message("First get failed ({}), trying to unquote" " ({})".format(self.key_name, new_key)) result = s3_client.get_object(Bucket=self.bucket_name, Key=new_key) self.add_message("get_object worked after unescaping") debug("after s3_client.get_object() result={}".format(type(result))) if result['ContentLength'] > MAX_EXE_SIZE: msg = """Too big: {}/{} {} ({})""".format(self.bucket_name, self.key_name, result['ContentLength'], repr(result)) print(msg) raise SigVerifyTooBig(msg) debug("before body read") flo = BytesIO(result['Body'].read()) debug("after read() flo={}".format(type(flo))) return flo
def lambda_handler(event, context): rss = event['rss'] bucket_name = event['bucket'] logging.info("Processing url: %s" % rss) logging.info("Using bucket: %s" % bucket_name) # session = boto3.Config(region_name="us-west-2") polly = boto3.client("polly") s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) logging.info("getting list of existing objects in the given bucket") files = set(o.key for o in bucket.objects.all()) feed = feedparser.parse(rss) title = feed['feed']['title'] fg = FeedGenerator() fg.load_extension('podcast') fg.title('Audio podcast based on: %s' % title) fg.link(href=feed.feed.link, rel='alternate') fg.subtitle(feed.feed.description) ENTRY_URL = "http://s3-{region}.amazonaws.com/{bucket}/{filename}" for entry in get_entries(feed): filename = "%s.mp3" % entry['id'] fe = fg.add_entry() fe.id(entry['id']) fe.title(entry['title']) fe.published(entry['published']) entry_url = ENTRY_URL.format( bucket=bucket_name, filename=filename, region=os.environ["AWS_REGION_BUCKET"] ) fe.enclosure(entry_url, 0, 'audio/mpeg') if filename in files: logging.info('Article "%s" with id %s already exist, skipping.' % (entry['title'], entry['id'])) continue try: logging.info("Next entry, size: %d" % len(entry['content'])) logging.debug("Content: %s" % entry['content']) response = polly.synthesize_speech( Text=entry['content'], OutputFormat="mp3", VoiceId="Joanna" ) with closing(response["AudioStream"]) as stream: bucket.put_object(Key=filename, Body=stream.read()) except BotoCoreError as error: logging.error(error) bucket.put_object(Key='podcast.xml', Body=fg.rss_str(pretty=True))
def send_sns(self, msg, e=None, reraise=False): # use first line of incoming msg as subject, but AWS limit is under 100 # chars. # ASSUME anything over is due to long s3 URL and use heuristic subject = msg.split('\n')[0] if len(subject) >= 100: # split assuming URL, then retain result (index 0) and file name # (index -1). File name should be sufficient to allow page # recipient to decide urgency of further investigation. pieces = subject.split('/') subject = "{} ... {}".format(pieces[0], pieces[-1]) if len(subject) >= 100: # don't try to be smarter, full text is still in 'msg' subject = "Truncated subject, examine message" # append bucket & key, short key first msg += "\n{}\nkey={}\nbucket={}".format( os.path.basename(self.key_name), self.key_name, self.bucket_name) # hack to get traceback in email if e: import traceback msg += traceback.format_exc() client = boto3.client("sns") # keep a global to prevent infinite recursion on arn error global topic_arn topic_arn = os.environ.get('SNSARN', "") if self.verbose: print("snsarn: {}".format(topic_arn)) if not topic_arn: # bad config, we expected this in the environ # set flag so we don't re-raise topic_arn = "no-topic-arn" raise KeyError("Missing 'SNSARN' from environment") try: # if the publish fails, we still want to continue, so we get the # details into the cloud watch logs. Otherwise, this can # (sometimes) terminate the lambda causing retries & DLQ response = client.publish(Message=msg, Subject=subject, TopicArn=topic_arn) debug("sns publish: '{}'".format(response)) except Exception as e: self.add_message("sns publish failed\n" " msg ({}): '{}'\n" " subj ({}): '{}'\n" "exception: '{}'" "".format(len(msg), str(msg), len(subject), str(subject), str(e)))
def send_sns(self, msg, e=None, reraise=False): # hack to get traceback in email if e: import traceback msg += traceback.format_exc() client = boto3.client("sns") # keep a global to prevent infinite recursion on arn error global topic_arn topic_arn = os.environ.get('SNSARN', "") if self.verbose: print("snsarn: {}".format(topic_arn)) if not topic_arn: # bad config, we expected this in the environ # set flag so we don't re-raise topic_arn = "no-topic-arn" raise KeyError("Missing 'SNSARN' from environment") response = client.publish(Message=msg, TopicArn=topic_arn) # noqa: W0612
def get_flo(self): s3_client = boto3.client('s3') debug("in get_flo") try: # Make sure the object is really available taken from # https://blog.rackspace.com/the-devnull-s3-bucket-hacking-with-aws-lambda-and-python # Don't use defaults, though -- that's 100 sec during testing! start_waiting = time.time() waiter = s3_client.get_waiter('object_exists') waiter.wait(Bucket=self.bucket_name, Key=self.key_name, WaiterConfig={ 'Delay': 3, 'MaxAttempts': 3 }) result = s3_client.get_object(Bucket=self.bucket_name, Key=self.key_name) except Exception as e: debug("s3 exceptions type: {}".format(type(e))) self.had_s3_error = True text = repr(e)[:256] self.add_error("failed to process s3 object {}/{} '{}'".format( self.bucket_name, self.key_name, text)) raise finally: self.s3_wait_time = time.time() - start_waiting debug("after s3_client.get_object() result={}".format(type(result))) if result['ContentLength'] > MAX_EXE_SIZE: msg = """Too big: {}/{} {} ({})""".format(self.bucket_name, self.key_name, result['ContentLength'], repr(result)) print(msg) raise SigVerifyTooBig(msg) debug("before body read") flo = BytesIO(result['Body'].read()) debug("after read() flo={}".format(type(flo))) return flo
def get_user(): # This function doesn't have to be decorated, because the API call to IAM # will be traced thanks to the monkey-patching. iam = boto3.client('iam') return dumps(iam.get_user(UserName="******"), default=json_serial)