def get_bucket(): session = boto3.Session( aws_access_key_id=env.get_credential("FEC_DOWNLOAD_ACCESS_KEY"), aws_secret_access_key=env.get_credential("FEC_DOWNLOAD_SECRET_KEY"), ) s3 = session.resource("s3") return s3.Bucket(env.get_credential("FEC_DOWNLOAD_BUCKET"))
def add_caching_headers(response): max_age = env.get_credential('FEC_CACHE_AGE') cache_all_requests = env.get_credential('CACHE_ALL_REQUESTS', False) status_code = response.status_code if max_age is not None: response.headers.add('Cache-Control', 'public, max-age={}'.format(max_age)) if (cache_all_requests and status_code == 200): try: # convert the results to JSON json_data = utils.get_json_data(response) # format the URL by removing the api_key and special characters formatted_url = utils.format_url(request.url) # get s3 bucket env variables s3_bucket = utils.get_bucket() cached_url = "s3://{0}/cached-calls/{1}.json".format(s3_bucket.name, formatted_url) s3_key = utils.get_s3_key(cached_url) # upload the request_content.json file to s3 bucket with smart_open(s3_key, 'wb') as cached_file: cached_file.write(json_data) logger.info('The following request has been cached and uploaded successfully :%s ', cached_url) except: logger.error('Cache Upload failed') return response
def get_bucket(): session = boto3.Session( aws_access_key_id=env.get_credential('access_key_id'), aws_secret_access_key=env.get_credential('secret_access_key'), ) s3 = session.resource('s3') return s3.Bucket(env.get_credential('bucket'))
def send_mail(buffer): client = mandrill.Mandrill(env.get_credential('MANDRILL_API_KEY')) message = { 'text': buffer.getvalue(), 'subject': get_subject(env.app), 'from_email': env.get_credential('FEC_EMAIL_SENDER'), 'to': get_recipients(env.get_credential('FEC_EMAIL_RECIPIENTS')), } client.messages.send(message=message, async=False)
def get_s3_key(name): connection = boto.s3.connect_to_region( env.get_credential('region'), aws_access_key_id=env.get_credential('access_key_id'), aws_secret_access_key=env.get_credential('secret_access_key'), ) bucket = connection.get_bucket(env.get_credential('bucket')) key = Key(bucket=bucket, name=name) return key
def notify(): slack = Slacker(env.get_credential('FEC_SLACK_TOKEN')) slack.chat.post_message( env.get_credential('FEC_SLACK_CHANNEL', '#fec'), 'deploying branch {branch} of app {name} to space {space} by {user}'.format( name=env.name, space=env.space, user=os.getenv('DEPLOY_USER'), branch=os.getenv('DEPLOY_BRANCH'), ), username=env.get_credential('FEC_SLACK_BOT', 'fec-bot'), )
def load_current_murs(): es = get_elasticsearch_connection() bucket = get_bucket() bucket_name = env.get_credential('bucket') with db.engine.connect() as conn: rs = conn.execute(ALL_MURS) for row in rs: case_id = row['case_id'] mur = { 'doc_id': 'mur_%s' % row['case_no'], 'no': row['case_no'], 'name': row['name'], 'mur_type': 'current', } mur['subject'] = {"text": get_subjects(case_id)} participants = get_participants(case_id) mur['participants'] = list(participants.values()) mur['disposition'] = get_disposition(case_id) mur['text'], mur['documents'] = get_documents( case_id, bucket, bucket_name) mur['open_date'], mur['close_date'] = get_open_and_close_dates( case_id) mur['url'] = '/legal/matter-under-review/%s/' % row['case_no'] es.index('docs', 'murs', mur, id=mur['doc_id'])
def initialize_newrelic(): license_key = env.get_credential('NEW_RELIC_LICENSE_KEY') if license_key: import newrelic.agent settings = newrelic.agent.global_settings() settings.license_key = license_key newrelic.agent.initialize()
def get_single_mur(mur_no): bucket = get_bucket() bucket_name = env.get_credential('bucket') with db.engine.connect() as conn: rs = conn.execute(SINGLE_MUR, mur_no) row = rs.fetchone() case_id = row['case_id'] sort1, sort2 = get_sort_fields(row['case_no']) mur = { 'doc_id': 'mur_%s' % row['case_no'], 'no': row['case_no'], 'name': row['name'], 'mur_type': 'current', 'sort1': sort1, 'sort2': sort2, } mur['subjects'] = get_subjects(case_id) mur['election_cycles'] = get_election_cycles(case_id) participants = get_participants(case_id) mur['participants'] = list(participants.values()) mur['respondents'] = get_sorted_respondents(mur['participants']) mur['commission_votes'] = get_commission_votes(case_id) mur['dispositions'] = get_dispositions(case_id) mur['documents'] = get_documents(case_id, bucket, bucket_name) mur['open_date'], mur['close_date'] = get_open_and_close_dates(case_id) mur['url'] = '/legal/matter-under-review/%s/' % row['case_no'] return mur
def load_advisory_opinions_into_s3(): if legal_loaded(): docs_in_db = set([str(r[0]) for r in db.engine.execute( "select document_id from document").fetchall()]) bucket = get_bucket() docs_in_s3 = set([re.match("legal/aos/([0-9]+)\.pdf", obj.key).group(1) for obj in bucket.objects.filter(Prefix="legal/aos")]) new_docs = docs_in_db.difference(docs_in_s3) if new_docs: query = "select document_id, fileimage from document \ where document_id in (%s)" % ','.join(new_docs) result = db.engine.connect().execution_options(stream_results=True)\ .execute(query) bucket_name = env.get_credential('bucket') for i, (document_id, fileimage) in enumerate(result): key = "legal/aos/%s.pdf" % document_id bucket.put_object(Key=key, Body=bytes(fileimage), ContentType='application/pdf', ACL='public-read') url = "https://%s.s3.amazonaws.com/%s" % (bucket_name, key) print("pdf written to %s" % url) print("%d of %d advisory opinions written to s3" % (i + 1, len(new_docs))) else: print("No new advisory opinions found.")
def index_regulations(): eregs_api = env.get_credential('FEC_EREGS_API', '') if(eregs_api): reg_versions = requests.get(eregs_api + 'regulation').json()['versions'] es = utils.get_elasticsearch_connection() reg_count = 0 for reg in reg_versions: url = '%sregulation/%s/%s' % (eregs_api, reg['regulation'], reg['version']) regulation = requests.get(url).json() sections = get_sections(regulation) print("Loading part %s" % reg['regulation']) for section_label in sections: doc_id = '%s_%s' % (section_label[0], section_label[1]) section_formatted = '%s-%s' % (section_label[0], section_label[1]) reg_url = '/regulations/{0}/{1}#{0}'.format(section_formatted, reg['version']) no = '%s.%s' % (section_label[0], section_label[1]) name = sections[section_label]['title'].split(no)[1].strip() doc = {"doc_id": doc_id, "name": name, "text": sections[section_label]['text'], 'url': reg_url, "no": no} es.index('docs', 'regulations', doc, id=doc['doc_id']) reg_count += 1 print("%d regulation parts indexed." % reg_count) else: print("Regs could not be indexed, environment variable not set.")
def sqla_conn_string(): sqla_conn_string = env.get_credential('SQLA_CONN') if not sqla_conn_string: print("Environment variable SQLA_CONN is empty; running against " + "local `cfdm_test`") sqla_conn_string = 'postgresql://:@/cfdm_test' return sqla_conn_string
def sqla_conn_string(): sqla_conn_string = env.get_credential('SQLA_CONN') if not sqla_conn_string: print("Environment variable SQLA_CONN is empty; running against " + "local `cfdm_test`") sqla_conn_string = 'postgresql://:@/cfdm_test' print(sqla_conn_string) return sqla_conn_string
def notify(ctx): try: meta = json.load(open('.cfmeta')) except OSError: meta = {} slack = Slacker(env.get_credential('FEC_SLACK_TOKEN')) slack.chat.post_message( env.get_credential('FEC_SLACK_CHANNEL', '#fec'), 'deploying branch {branch} of app {name} to space {space} by {user}'.format( name=env.name, space=env.space, user=meta.get('user'), branch=meta.get('branch'), ), username=env.get_credential('FEC_SLACK_BOT', 'fec-bot'), )
def get_murs(from_mur_no): bucket = get_bucket() bucket_name = env.get_credential('bucket') if from_mur_no is None: start_mur_serial = 0 else: start_mur_serial = int(MUR_NO_REGEX.match(from_mur_no).group('serial')) with db.engine.connect() as conn: rs = conn.execute(ALL_MURS, start_mur_serial) for row in rs: case_id = row['case_id'] sort1, sort2 = get_sort_fields(row['case_no']) mur = { 'doc_id': 'mur_%s' % row['case_no'], 'no': row['case_no'], 'name': row['name'], 'mur_type': 'current', 'sort1': sort1, 'sort2': sort2, } mur['subjects'] = get_subjects(case_id) mur['election_cycles'] = get_election_cycles(case_id) participants = get_participants(case_id) mur['participants'] = list(participants.values()) mur['respondents'] = get_sorted_respondents(mur['participants']) mur['commission_votes'] = get_commission_votes(case_id) mur['dispositions'] = get_dispositions(case_id) mur['documents'] = get_documents(case_id, bucket, bucket_name) mur['open_date'], mur['close_date'] = get_open_and_close_dates( case_id) mur['url'] = '/legal/matter-under-review/%s/' % row['case_no'] yield mur
def refresh(): """Update incremental aggregates, itemized schedules, materialized views, then slack a notification to the development team. """ manage.logger.info('Starting nightly refresh...') try: manage.refresh_itemized() manage.refresh_materialized() download.clear_bucket() slack_message = '*Success* nightly updates for {0} completed'.format(env.get_credential('NEW_RELIC_APP_NAME')) utils.post_to_slack(slack_message, '#bots') manage.logger.info(slack_message) except Exception as error: manage.logger.exception(error) slack_message = '*ERROR* nightly update failed for {0}. Check logs.'.format(env.get_credential('NEW_RELIC_APP_NAME')) utils.post_to_slack(slack_message, '#bots') manage.logger.exception(error)
def configure_backup_repository(repository=BACKUP_REPOSITORY_NAME): ''' Configure s3 backup repository using api credentials. This needs to get re-run when s3 credentials change for each API deployment ''' es = utils.get_elasticsearch_connection() logger.info("Configuring backup repository: {0}".format(repository)) body = { 'type': 's3', 'settings': { 'bucket': env.get_credential("bucket"), 'region': env.get_credential("region"), 'access_key': env.get_credential("access_key_id"), 'secret_key': env.get_credential("secret_access_key"), 'base_path': BACKUP_DIRECTORY, }, } es.snapshot.create_repository(repository=repository, body=body)
def process_mur(mur): logger.info("processing mur %d of %d" % (mur[0], mur[1])) es = utils.get_elasticsearch_connection() bucket = get_bucket() bucket_name = env.get_credential('bucket') mur_names = get_mur_names() (mur_no_td, open_date_td, close_date_td, parties_td, subject_td, citations_td)\ = re.findall("<td[^>]*>(.*?)</td>", mur[2], re.S) mur_no = re.search("/disclosure_data/mur/([0-9_A-Z]+)\.pdf", mur_no_td).group(1) logger.info("processing mur %s" % mur_no) pdf_key = 'legal/murs/%s.pdf' % mur_no if [k for k in bucket.objects.filter(Prefix=pdf_key)]: logger.info('already processed %s' % pdf_key) return text, pdf_size, pdf_pages = process_mur_pdf(mur_no, pdf_key, bucket) pdf_url = generate_aws_s3_url(bucket_name, pdf_key) open_date, close_date = (None, None) if open_date_td: open_date = datetime.strptime(open_date_td, '%m/%d/%Y').isoformat() if close_date_td: close_date = datetime.strptime(close_date_td, '%m/%d/%Y').isoformat() parties = re.findall("(.*?)<br>", parties_td) complainants = [] respondents = [] for party in parties: match = re.match("\(([RC])\) - (.*)", party) name = match.group(2).strip().title() if match.group(1) == 'C': complainants.append(name) if match.group(1) == 'R': respondents.append(name) subject = get_subject_tree(subject_td) citations = get_citations(re.findall("(.*?)<br>", citations_td)) mur_digits = re.match("([0-9]+)", mur_no).group(1) name = mur_names[mur_digits] if mur_digits in mur_names else '' doc = { 'doc_id': 'mur_%s' % mur_no, 'no': mur_no, 'name': name, 'text': text, 'mur_type': 'archived', 'pdf_size': pdf_size, 'pdf_pages': pdf_pages, 'open_date': open_date, 'close_date': close_date, 'complainants': complainants, 'respondents': respondents, 'subject': subject, 'citations': citations, 'url': pdf_url } es.index(DOCS_INDEX, 'murs', doc, id=doc['doc_id'])
def post_to_slack(message, channel): response = requests.post( env.get_credential('SLACK_HOOK'), data=json.dumps({ 'text': message, 'channel': channel, 'link_names': 1, 'username': '******', 'icon_emoji': ':robot_face:', }), headers={'Content-Type': 'application/json'}, ) if response.status_code != 200: logger.error('SLACK ERROR- Message failed to send:{0}'.format(message))
def redis_url(): """ Retrieve the URL needed to connect to a Redis instance, depending on environment. When running in a cloud.gov environment, retrieve the uri credential for the 'redis32' service. """ # Is the app running in a cloud.gov environment if env.space is not None: redis_env = env.get_service(label='redis32') redis_url = redis_env.credentials.get('uri') return redis_url return env.get_credential('FEC_REDIS_URL', 'redis://localhost:6379/0')
def index_regulations(): """ Indexes the regulations relevant to the FEC in Elasticsearch. The regulations are accessed from FEC_EREGS_API. """ eregs_api = env.get_credential('FEC_EREGS_API', '') if not eregs_api: logger.error( "Regs could not be indexed, environment variable FEC_EREGS_API not set." ) return logger.info("Indexing regulations") reg_versions = requests.get(eregs_api + 'regulation').json()['versions'] es = utils.get_elasticsearch_connection() reg_count = 0 for reg in reg_versions: url = '%sregulation/%s/%s' % (eregs_api, reg['regulation'], reg['version']) regulation = requests.get(url).json() sections = get_sections(regulation) logger.debug("Loading part %s" % reg['regulation']) for section_label in sections: doc_id = '%s_%s' % (section_label[0], section_label[1]) section_formatted = '%s-%s' % (section_label[0], section_label[1]) reg_url = '/regulations/{0}/{1}#{0}'.format( section_formatted, reg['version']) no = '%s.%s' % (section_label[0], section_label[1]) name = sections[section_label]['title'].split(no)[1].strip() doc = { "doc_id": doc_id, "name": name, "text": sections[section_label]['text'], "url": reg_url, "no": no, "sort1": int(section_label[0]), "sort2": int(section_label[1]) } es.index(DOCS_INDEX, 'regulations', doc, id=doc['doc_id']) reg_count += 1 logger.info("%d regulation parts indexed", reg_count)
def index_advisory_opinions(): print('Indexing advisory opinions...') if legal_loaded(): count = db.engine.execute('select count(*) from AO').fetchone()[0] print('AO count: %d' % count) count = db.engine.execute( 'select count(*) from DOCUMENT').fetchone()[0] print('DOC count: %d' % count) es = utils.get_elasticsearch_connection() result = db.engine.execute("""select DOCUMENT_ID, OCRTEXT, DESCRIPTION, CATEGORY, DOCUMENT.AO_ID, NAME, SUMMARY, TAGS, AO_NO, DOCUMENT_DATE FROM DOCUMENT INNER JOIN AO on AO.AO_ID = DOCUMENT.AO_ID""") docs_loaded = 0 bucket_name = env.get_credential('bucket') for row in result: key = "legal/aos/%s.pdf" % row[0] pdf_url = "https://%s.s3.amazonaws.com/%s" % (bucket_name, key) doc = { "doc_id": row[0], "text": row[1], "description": row[2], "category": row[3], "id": row[4], "name": row[5], "summary": row[6], "tags": row[7], "no": row[8], "date": row[9], "url": pdf_url } es.index('docs', 'advisory_opinions', doc, id=doc['doc_id']) docs_loaded += 1 if docs_loaded % 500 == 0: print("%d docs loaded" % docs_loaded) print("%d docs loaded" % docs_loaded)
def index_regulations(): """ Indexes the regulations relevant to the FEC in Elasticsearch. The regulations are accessed from FEC_EREGS_API. """ eregs_api = env.get_credential('FEC_EREGS_API', '') if not eregs_api: logger.error("Regs could not be indexed, environment variable FEC_EREGS_API not set.") return logger.info("Indexing regulations") reg_versions = requests.get(eregs_api + 'regulation').json()['versions'] es = utils.get_elasticsearch_connection() reg_count = 0 for reg in reg_versions: url = '%sregulation/%s/%s' % (eregs_api, reg['regulation'], reg['version']) regulation = requests.get(url).json() sections = get_sections(regulation) logger.debug("Loading part %s" % reg['regulation']) for section_label in sections: doc_id = '%s_%s' % (section_label[0], section_label[1]) section_formatted = '%s-%s' % (section_label[0], section_label[1]) reg_url = '/regulations/{0}/{1}#{0}'.format(section_formatted, reg['version']) no = '%s.%s' % (section_label[0], section_label[1]) name = sections[section_label]['title'].split(no)[1].strip() doc = { "doc_id": doc_id, "name": name, "text": sections[section_label]['text'], "url": reg_url, "no": no, "sort1": int(section_label[0]), "sort2": int(section_label[1]) } es.index('docs_index', 'regulations', doc, id=doc['doc_id']) reg_count += 1 logger.info("%d regulation parts indexed", reg_count)
def redis_url(): """ Retrieves the URL needed to connect to a Redis instance. """ # Attempt to retrieve the space name the application is running in; this # will return the space if the app is running in a cloud.gov environment or # None if it is running locally. if env.space is not None: logger.info( 'Running in the {0} space in cloud.gov.'.format(env.space) ) # While we are not able to connect to Redis, retry as many times as # necessary. This is usually due to a brief 1 - 3 second downtime as # a service instance is rebooted in the cloud.gov environment. # TODO: Make this more robust in the case of extended outages. while True: logger.info('Attempting to connect to Redis...') redis = env.get_service(label='redis32') if redis is not None: logger.info('Successfully connected to Redis.') break else: logger.error('Could not connect to Redis, retrying...') # Construct the Redis instance URL based on the service information # returned. url = redis.get_url(host='hostname', password='******', port='port') return 'redis://{}'.format(url) else: logger.debug( 'Not running in a cloud.gov space, attempting to connect locally.' ) # Fall back to attempting to read whatever is set in the FEC_REDIS_URL # environment variable, otherwise a localhost connection. return env.get_credential('FEC_REDIS_URL', 'redis://localhost:6379/0')
def get_single_case(case_type, case_no): bucket = get_bucket() bucket_name = env.get_credential('bucket') with db.engine.connect() as conn: rs = conn.execute(SINGLE_CASE, case_type, case_no) row = rs.first() if row is not None: case_id = row['case_id'] sort1, sort2 = get_sort_fields(row['case_no']) case = { 'doc_id': '{0}_{1}'.format(case_type.lower(), row['case_no']), 'no': row['case_no'], 'name': row['name'], 'published_flg': row['published_flg'], 'sort1': sort1, 'sort2': sort2, } case['commission_votes'] = get_commission_votes(case_type, case_id) case['documents'] = get_documents(case_id, bucket, bucket_name) case['url'] = '/legal/{0}/{1}/'.format(get_full_name(case_type), row['case_no']) if case_type == 'AF': case = extend(case, get_af_specific_fields(case_id)) return case if case_type == 'MUR': case['mur_type'] = 'current' case['subjects'] = get_subjects(case_id) case['election_cycles'] = get_election_cycles(case_id) participants = get_participants(case_id) case['participants'] = list(participants.values()) case['respondents'] = get_sorted_respondents(case['participants']) case['dispositions'] = get_dispositions(case_id) case['open_date'], case['close_date'] = get_open_and_close_dates( case_id) return case else:
def get_advisory_opinions(from_ao_no): bucket = get_bucket() bucket_name = env.get_credential('bucket') ao_names = get_ao_names() ao_no_to_component_map = {a: tuple(map(int, a.split('-'))) for a in ao_names} citations = get_citations(ao_names) if from_ao_no is None: start_ao_year, start_ao_serial = 0, 0 else: start_ao_year, start_ao_serial = tuple(map(int, from_ao_no.split('-'))) with db.engine.connect() as conn: rs = conn.execute(ALL_AOS, (start_ao_year, start_ao_serial, start_ao_year)) for row in rs: ao_id = row["ao_id"] year, serial = ao_no_to_component_map[row["ao_no"]] ao = { "no": row["ao_no"], "name": row["name"], "summary": row["summary"], "request_date": row["req_date"], "issue_date": row["issue_date"], "is_pending": row["is_pending"], "ao_citations": citations[row["ao_no"]]["ao"], "aos_cited_by": citations[row["ao_no"]]["aos_cited_by"], "statutory_citations": citations[row["ao_no"]]["statutes"], "regulatory_citations": citations[row["ao_no"]]["regulations"], "sort1": -year, "sort2": -serial, } ao["documents"] = get_documents(ao_id, bucket, bucket_name) (ao["requestor_names"], ao["requestor_types"], ao["commenter_names"], ao["representative_names"]) = get_entities(ao_id) yield ao
def handle_exception(exception): wrapped = ResponseException(str(exception), ErrorCode.INTERNAL_ERROR, type(exception)) logger.info( 'An API error occurred with the status code of {status} ({exception}).' .format(status=wrapped.status, exception=wrapped.wrappedException)) if is_retrievable_from_cache(wrapped.status, request.path): logger.info('Attempting to retrieving the cached request from S3...') # Retrieve the information needed to construct a URL for the S3 bucket # where the cached API responses live. formatted_url = utils.format_url(request.url) s3_bucket = utils.get_bucket() bucket_region = env.get_credential('region') cached_url = "http://s3-{0}.amazonaws.com/{1}/cached-calls/{2}".format( bucket_region, s3_bucket.name, formatted_url) # Attempt to retrieve the cached data from S3. cached_data = utils.get_cached_request(cached_url) # If the cached data was returned, we can return that to the client. # Otherwise, log the error and raise an API error. if cached_data is not None: logger.info('Successfully retrieved cached request from S3.') return cached_data else: logger.error( 'An error occured while retrieving the cached file from S3.') raise exceptions.ApiError( 'The requested URL could not be found.'.format(request.url), status_code=http.client.NOT_FOUND) else: raise exceptions.ApiError( 'The requested URL could not be found.'.format(request.url), status_code=http.client.NOT_FOUND)
def sqla_conn_string(): sqla_conn_string = env.get_credential('SQLA_CONN') if not sqla_conn_string: print("Environment variable SQLA_CONN is empty; running against " + "local `cfdm_test`") sqla_conn_string = 'postgresql://:@/cfdm_test' return sqla_conn_string # app.debug = True app.config['SQLALCHEMY_DATABASE_URI'] = sqla_conn_string() app.config['APISPEC_FORMAT_RESPONSE'] = None app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.config['SQLALCHEMY_POOL_SIZE'] = 50 app.config['SQLALCHEMY_MAX_OVERFLOW'] = 50 app.config['SQLALCHEMY_POOL_TIMEOUT'] = 120 app.config['SQLALCHEMY_RESTRICT_FOLLOWER_TRAFFIC_TO_TASKS'] = bool( env.get_credential('SQLA_RESTRICT_FOLLOWER_TRAFFIC_TO_TASKS', '') ) app.config['SQLALCHEMY_FOLLOWER_TASKS'] = [ 'webservices.tasks.download.export_query', ] app.config['SQLALCHEMY_FOLLOWERS'] = [ sa.create_engine(follower.strip()) for follower in env.get_credential('SQLA_FOLLOWERS', '').split(',') if follower.strip() ] # app.config['SQLALCHEMY_ECHO'] = True # Modify app configuration and logging level for production if not app.debug: app.logger.addHandler(logging.StreamHandler()) app.logger.setLevel(logging.INFO)
from webservices.env import env from flask import request from webservices import utils es = utils.get_elasticsearch_connection() write_cred = env.get_credential('WRITE_AUTHORIZED_TOKENS', '') write_authorized_tokens = [token.strip() for token in write_cred.split(',')] class Legal(utils.Resource): def post(self, **kwargs): data = request.get_json() if data['api_key'] in write_authorized_tokens: es.bulk((es.index_op(doc, id=doc['doc_id']) for doc in data['docs']), index='docs', doc_type=data['doc_type']) return {'success': True} else: msg = {'success': False, 'message': "Your API token has not been authorized to write data to this application."} return msg, 401
sqla_conn_string = 'postgresql://:@/cfdm_test' print(sqla_conn_string) return sqla_conn_string app = Flask(__name__) app.debug = True app.config['SQLALCHEMY_DATABASE_URI'] = sqla_conn_string() app.config['APISPEC_FORMAT_RESPONSE'] = None app.config['SQLALCHEMY_REPLICA_TASKS'] = [ 'webservices.tasks.download.export_query', ] app.config['SQLALCHEMY_FOLLOWERS'] = [ sa.create_engine(follower.strip()) for follower in env.get_credential('SQLA_FOLLOWERS', '').split(',') if follower.strip() ] # app.config['SQLALCHEMY_ECHO'] = True db.init_app(app) cors.CORS(app) class FlaskRestParser(FlaskParser): def handle_error(self, error): message = error.messages status_code = getattr(error, 'status_code', 422) raise exceptions.ApiError(message, status_code) parser = FlaskRestParser()
def redis_url(): redis = env.get_service(label='redis32') if redis: url = redis.get_url(host='hostname', password='******', port='port') return 'redis://{}'.format(url) return env.get_credential('FEC_REDIS_URL', 'redis://localhost:6379/0')
def get_s3_key(name): connection = boto.s3.connect_to_region(env.get_credential('region'), ) bucket = connection.get_bucket(env.get_credential('bucket')) key = Key(bucket=bucket, name=name) return key
def get_app_name(): return env.get_credential('NEW_RELIC_APP_NAME')
def api_ui(): return render_template( 'swagger-ui.html', specs_url=url_for('docs.api_spec'), PRODUCTION=env.get_credential('PRODUCTION'), )
def get_bucket(): session = boto3.Session() s3 = session.resource('s3') return s3.Bucket(env.get_credential('bucket'))
logger = logging.getLogger(__name__) use_kwargs = functools.partial(use_kwargs_original, locations=('query', )) class Resource(six.with_metaclass(MethodResourceMeta, restful.Resource)): pass API_KEY_ARG = fields.Str( required=True, missing='DEMO_KEY', description=docs.API_KEY_DESCRIPTION, ) if env.get_credential('PRODUCTION'): Resource = use_kwargs({'api_key': API_KEY_ARG})(Resource) fec_url_map = {'9': 'http://docquery.fec.gov/dcdev/posted/{0}.fec'} fec_url_map = defaultdict(lambda : 'http://docquery.fec.gov/paper/posted/{0}.fec', fec_url_map) def check_cap(kwargs, cap): if cap: if not kwargs.get('per_page') or kwargs['per_page'] > cap: raise exceptions.ApiError( 'Parameter "per_page" must be between 1 and {}'.format(cap), status_code=422, )
BROKER_URL=redis_url(), ONCE_REDIS_URL=redis_url(), ONCE_DEFAULT_TIMEOUT=60 * 60, CELERY_IMPORTS=( 'webservices.tasks.refresh', 'webservices.tasks.download', ), CELERYBEAT_SCHEDULE={ 'refresh': { 'task': 'webservices.tasks.refresh.refresh', 'schedule': crontab(minute=0, hour=9), }, } ) client = Client(env.get_credential('SENTRY_DSN')) register_signal(client) register_logger_signal(client) context = {} @signals.task_prerun.connect def push_context(task_id, task, *args, **kwargs): context[task_id] = utils.get_app().app_context() context[task_id].push() @signals.task_postrun.connect def pop_context(task_id, task, *args, **kwargs): if task_id in context: context[task_id].pop()
@app.route('/') @app.route('/v1/') @docs.route('/developer/') def api_ui_redirect(): return redirect(url_for('docs.api_ui'), code=http.client.MOVED_PERMANENTLY) @docs.route('/developers/') def api_ui(): return render_template( 'swagger-ui.html', specs_url=url_for('docs.api_spec'), PRODUCTION=os.getenv('PRODUCTION'), ) app.register_blueprint(docs) def initialize_newrelic(): license_key = env.get_credential('NEW_RELIC_LICENSE_KEY') if license_key: import newrelic.agent settings = newrelic.agent.global_settings() settings.license_key = license_key newrelic.agent.initialize() initialize_newrelic() if env.get_credential('SENTRY_DSN'): Sentry(app, dsn=env.get_credential('SENTRY_DSN'))
if not sqla_conn_string: print("Environment variable SQLA_CONN is empty; running against " + "local `cfdm_test`") sqla_conn_string = 'postgresql://:@/cfdm_test' return sqla_conn_string # app.debug = True app.config['SQLALCHEMY_DATABASE_URI'] = sqla_conn_string() app.config['APISPEC_FORMAT_RESPONSE'] = None app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.config['SQLALCHEMY_POOL_SIZE'] = 50 app.config['SQLALCHEMY_MAX_OVERFLOW'] = 50 app.config['SQLALCHEMY_POOL_TIMEOUT'] = 120 app.config['SQLALCHEMY_RESTRICT_FOLLOWER_TRAFFIC_TO_TASKS'] = bool( env.get_credential('SQLA_RESTRICT_FOLLOWER_TRAFFIC_TO_TASKS', '')) app.config['SQLALCHEMY_FOLLOWER_TASKS'] = [ 'webservices.tasks.download.export_query', ] app.config['SQLALCHEMY_FOLLOWERS'] = [ sa.create_engine(follower.strip()) for follower in utils.split_env_var( env.get_credential('SQLA_FOLLOWERS', '')) if follower.strip() ] app.config['SQLALCHEMY_ROUTE_SCHEDULE_A'] = bool( env.get_credential('SQLA_ROUTE_SCHEDULE_A', '')) app.config['PROPAGATE_EXCEPTIONS'] = True # app.config['SQLALCHEMY_ECHO'] = True # Modify app configuration and logging level for production if not app.debug:
sqla_conn_string = 'postgresql://:@/cfdm_test' print(sqla_conn_string) return sqla_conn_string app = Flask(__name__) app.debug = True app.config['SQLALCHEMY_DATABASE_URI'] = sqla_conn_string() app.config['APISPEC_FORMAT_RESPONSE'] = None app.config['SQLALCHEMY_REPLICA_TASKS'] = [ 'webservices.tasks.download.export_query', ] app.config['SQLALCHEMY_FOLLOWERS'] = [ sa.create_engine(follower.strip()) for follower in env.get_credential('SQLA_FOLLOWERS', '').split(',') if follower.strip() ] # app.config['SQLALCHEMY_ECHO'] = True db.init_app(app) cors.CORS(app) class FlaskRestParser(FlaskParser): def handle_error(self, error): message = error.messages status_code = getattr(error, 'status_code', 422) raise exceptions.ApiError(message, status_code) parser = FlaskRestParser() app.config['APISPEC_WEBARGS_PARSER'] = parser
import elasticsearch from webservices.env import env from flask import request from webservices import utils es = utils.get_elasticsearch_connection() write_cred = env.get_credential('WRITE_AUTHORIZED_TOKENS', '') write_authorized_tokens = [token.strip() for token in write_cred.split(',')] class Legal(utils.Resource): def post(self, **kwargs): data = request.get_json() if data['api_key'] in write_authorized_tokens: elasticsearch.helpers.bulk( es, (dict(_op_type='index', _source=doc, id=doc['doc_id']) for doc in data['docs']), index='docs', doc_type=data['doc_type']) return {'success': True} else: msg = { 'success': False, 'message': "Your API token has not been authorized to write data to this application." } return msg, 401
logger = logging.getLogger(__name__) use_kwargs = functools.partial(use_kwargs_original, locations=('query', )) class Resource(six.with_metaclass(MethodResourceMeta, restful.Resource)): pass API_KEY_ARG = fields.Str( required=True, missing='DEMO_KEY', description=docs.API_KEY_DESCRIPTION, ) if env.get_credential('PRODUCTION'): Resource = use_kwargs({'api_key': API_KEY_ARG})(Resource) fec_url_map = {'9': 'http://docquery.fec.gov/dcdev/posted/{0}.fec'} fec_url_map = defaultdict( lambda: 'http://docquery.fec.gov/paper/posted/{0}.fec', fec_url_map) def check_cap(kwargs, cap): if cap: if not kwargs.get('per_page') or kwargs['per_page'] > cap: raise exceptions.ApiError( 'Parameter "per_page" must be between 1 and {}'.format(cap), status_code=422, )
def sqla_conn_string(): sqla_conn_string = env.get_credential('SQLA_CONN') if not sqla_conn_string: print("Environment variable SQLA_CONN is empty; running against " + "local `cfdm_test`") sqla_conn_string = 'postgresql://:@/cfdm_test' return sqla_conn_string # app.debug = True app.config['SQLALCHEMY_DATABASE_URI'] = sqla_conn_string() app.config['APISPEC_FORMAT_RESPONSE'] = None app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.config['SQLALCHEMY_POOL_SIZE'] = 50 app.config['SQLALCHEMY_MAX_OVERFLOW'] = 50 app.config['SQLALCHEMY_POOL_TIMEOUT'] = 120 app.config['SQLALCHEMY_RESTRICT_FOLLOWER_TRAFFIC_TO_TASKS'] = bool( env.get_credential('SQLA_RESTRICT_FOLLOWER_TRAFFIC_TO_TASKS', '') ) app.config['SQLALCHEMY_FOLLOWER_TASKS'] = [ 'webservices.tasks.download.export_query', ] app.config['SQLALCHEMY_FOLLOWERS'] = [ sa.create_engine(follower.strip()) for follower in env.get_credential('SQLA_FOLLOWERS', '').split(',') if follower.strip() ] app.config['PROPAGATE_EXCEPTIONS'] = True # app.config['SQLALCHEMY_ECHO'] = True # Modify app configuration and logging level for production if not app.debug:
def redis_url(): redis = env.get_service(label='redis28') if redis: url = redis.get_url(host='hostname', password='******', port='port') return 'redis://{}'.format(url) return env.get_credential('FEC_REDIS_URL', 'redis://localhost:6379/0')
sqla_conn_string = env.get_credential('SQLA_CONN') if not sqla_conn_string: print("Environment variable SQLA_CONN is empty; running against " + "local `cfdm_test`") sqla_conn_string = 'postgresql://:@/cfdm_test' return sqla_conn_string # app.debug = True app.config['SQLALCHEMY_DATABASE_URI'] = sqla_conn_string() app.config['APISPEC_FORMAT_RESPONSE'] = None app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.config['SQLALCHEMY_POOL_SIZE'] = 50 app.config['SQLALCHEMY_MAX_OVERFLOW'] = 50 app.config['SQLALCHEMY_POOL_TIMEOUT'] = 120 app.config['SQLALCHEMY_RESTRICT_FOLLOWER_TRAFFIC_TO_TASKS'] = bool( env.get_credential('SQLA_RESTRICT_FOLLOWER_TRAFFIC_TO_TASKS', '') ) app.config['SQLALCHEMY_FOLLOWER_TASKS'] = [ 'webservices.tasks.download.export_query', ] app.config['SQLALCHEMY_FOLLOWERS'] = [ sa.create_engine(follower.strip()) for follower in utils.split_env_var(env.get_credential('SQLA_FOLLOWERS', '')) if follower.strip() ] app.config['SQLALCHEMY_ROUTE_SCHEDULE_A'] = bool( env.get_credential('SQLA_ROUTE_SCHEDULE_A', '') ) app.config['PROPAGATE_EXCEPTIONS'] = True # app.config['SQLALCHEMY_ECHO'] = True