def parse_log_files(): """Parse S3 log files that reside in an S3 bucket The contents of BUCKET_NAME are iterated over. Already parsed files have their filename added to PARSED_FILES to prevent duplicate parsing. """ aws_access_key = os.environ.get('S3_LOG_ACCESS_KEY') aws_secret_key = os.environ.get('S3_LOG_SECRET_KEY') bucket_name = os.environ.get('S3_LOG_BUCKET') prefix = os.environ.get('S3_LOG_PREFIX') conn = S3Connection(aws_access_key, aws_secret_key) bucket = conn.get_bucket(bucket_name) file_count = 0 for key in bucket.list(prefix=prefix): try: log_file = LogFile.objects.get(key=key.key) if (log_file.parsed is False) and (log_file.lock is False): log_file.lock = True log_file.save() else: continue except ObjectDoesNotExist: log_file = LogFile(key=key.key, parsed=False, lock=True) log_file.save() contents = str(key.get_contents_as_string()) parse_str(contents) log_file.parsed = True log_file.lock = False log_file.save()
def parse_log_files_locally(path_to_logs): """Parse S3 log files that are local Intended to be run manually, has no provisions for locking that the normal task has. """ for log in os.listdir(path_to_logs): try: key = "logs/" + log log_file = LogFile.objects.get(key=key) except ObjectDoesNotExist: contents = open(path_to_logs + log, 'r').read() key = "logs/" + log print(key) parse_str(contents) log_file = LogFile(key=key, parsed=True, lock=False) log_file.save()