def monolith(): """ Runs the script. This is one monolithic function (aptly named) as the script just needs to be run; however, there is a certain point where I need to break out of the program if an error occurs, and I wasn't sure how exactly sys.exit() would work and whether or not it would mess with things outside of / calling this script, so I just made one giant method so I can return when needed. :return: """ url_base = "https://api.data.gov/regulations/v3/documents.json?rpp=1000" r = redis_manager.RedisManager(redis.Redis()) regulations_key = config.read_value('key') current_page = 0 if regulations_key != "": # Gets number of documents available to download try: record_count = requests.get( "https://api.data.gov/regulations/v3/documents.json?api_key=" + regulations_key + "&countsOnly=1").json()["totalNumRecords"] except: logger.error('Error occured with API request') print("Error occurred with docs_work_gen regulations API request.") return 0 # Gets the max page we'll go to; each page is 1000 documents max_page_hit = record_count // 1000 # This loop generates lists of URLs, sending out a job and writing them to the work server every 1000 URLs. # It will stop and send whatever's left if we hit the max page limit. while current_page < max_page_hit: url_list = [] for i in range(1000): current_page += 1 url_full = url_base + "&po=" + str(current_page * 1000) url_list.append(url_full) if current_page == max_page_hit: break # Makes a JSON from the list of URLs and send it to the queue as a job docs_work = [ ''.join( random.choices(string.ascii_letters + string.digits, k=16)), "docs", url_list ] r.add_to_queue(endpoints.generate_json(docs_work)) else: print("No API Key!")
import requests import mirrulations_core.config as config from mirrulations.mirrulations_logging import logger key = config.read_value('key') def call(url): """ Sends an API call to regulations.gov Raises exceptions if it is not a valid API call When a 300 status code is given, return a temporary exception so the user can retry the API call When a 429 status code is given, the user is out of API calls and must wait an hour to make more When 400 or 500 status codes are given there is a problem with the API connection :param url: the url that will be used to make the API call :return: returns the json format information of the documents """ logger.warning('Making API call...') result = requests.get(url) if 300 <= result.status_code < 400: logger.warning('API call failed') raise TemporaryException if result.status_code == 429: logger.warning('API call failed') raise ApiCountZeroException if 400 <= result.status_code < 600: logger.warning('API call failed') raise PermanentException logger.warning('API call successfully made') return result
from mirrulations.api_call_management import * import json import logging import mirrulations_core.config as config workfiles = [] version = "v1.3" key = config.read_value('key') client_id = config.read_value('client_id') FORMAT = '%(asctime)-15s %(clientip)s %(user)-8s %(message)s' logging.basicConfig(filename='documents_processor.log', format=FORMAT) d = {'clientip': '192.168.0.1', 'user': client_id} logger = logging.getLogger('tcpserver') def documents_processor(urls, job_id, client_id): """ Call each url in the list, process the results of the calls and then form a json file to send back the results :param urls: list of urls that have to be called :param job_id: the id of the job that is being worked on currently :param client_id: id of the client calling this function :return result: the json to be returned to the server after each call is processed """ global workfiles workfiles = [] logger.debug('Call Successful: %s', 'documents_processor: Processing documents', extra=d) logger.info('Processing documents into JSON...')
import mirrulations.document_processor as doc import mirrulations.documents_processor as docs import mirrulations.api_call_management as man import requests import json import time import logging import shutil import tempfile from pathlib import Path import mirrulations_core.config as config # These variables are specific to the current implementation version = "v1.3" ip = config.read_value('ip') port = config.read_value('port') serverurl = "http://" + ip + ":" + port key = config.read_value('key') client_id = config.read_value('client_id') FORMAT = '%(asctime)-15s %(clientip)s %(user)-8s %(message)s' logging.basicConfig(filename='client.log', format=FORMAT) d = {'clientip': '192.168.0.1', 'user': client_id} logger = logging.getLogger('tcpserver') client_health_url = "https://hc-ping.com/457a1034-83d4-4a62-8b69-c71060db3a08"