def test_deletes_http_cache(http_pretty, tmpdir_setup):
    assert len(http_client().cache.responses) == 0

    httpretty.register_uri(httpretty.GET, 'http://example.com')
    http_client().get('http://example.com')
    assert len(http_client().cache.responses) == 1

    CliRunner().invoke(clear)
    assert len(http_client().cache.responses) == 0
def fetch_notice_json(cfr_title,
                      cfr_part,
                      only_final=False,
                      max_effective_date=None):
    """Search through all articles associated with this part. Right now,
    limited to 1000; could use paging to fix this in the future."""
    params = {
        "conditions[cfr][title]": cfr_title,
        "conditions[cfr][part]": cfr_part,
        "per_page": 1000,
        "order": "oldest",
        "fields[]": FULL_NOTICE_FIELDS
    }
    if only_final:
        params["conditions[type][]"] = 'RULE'
    if max_effective_date:
        params["conditions[effective_date][lte]"] = max_effective_date
    url = API_BASE + "articles"
    logger.info("Fetching notices - URL: %s Params: %r", url, params)
    response = http_client().get(url, params=params).json()
    logger.debug("Fetching notices response - %r", response)
    if 'results' in response:
        return response['results']
    else:
        return []
def meta_data(document_number, fields=None):
    """Return the requested meta data for a specific Federal Register
    document. Accounts for a bad document number by throwing an exception"""
    url = "{}articles/{}".format(API_BASE, document_number)
    params = {}     # default fields are generally good
    if fields:
        params["fields[]"] = fields
    response = http_client().get(url, params=params)
    response.raise_for_status()
    return response.json()
Exemplo n.º 4
0
def check_url(url):
    """Verify that content exists at a given URL"""
    client = http_client()
    response = client.head(url)

    if response.status_code == requests.codes.not_implemented:
        response = client.get(url)

    if response.status_code == requests.codes.ok:
        return url
def meta_data(document_number, fields=None):
    """Return the requested meta data for a specific Federal Register
    document. Accounts for a bad document number by throwing an exception"""
    url = "{}articles/{}".format(API_BASE, document_number)
    params = {}  # default fields are generally good
    if fields:
        params["fields[]"] = fields
    response = http_client().get(url, params=params)
    response.raise_for_status()
    return response.json()
Exemplo n.º 6
0
def check_url(url):
    """Verify that content exists at a given URL"""
    client = http_client()
    response = client.head(url)

    if response.status_code == requests.codes.not_implemented:
        response = client.get(url)

    if response.status_code == requests.codes.ok:
        return url
 def _get(self, suffix):
     """Actually make the GET request. Assume the result is JSON. Right
     now, there is no error handling"""
     if self.base_url.startswith('http'):    # API
         json_str = http_client().get(self.base_url + suffix).text
     else:   # file system
         if os.path.isdir(self.base_url + suffix):
             suffix = suffix + "/index.html"
         with open(self.base_url + suffix) as f:
             json_str = f.read()
     return json.loads(json_str, object_hook=node_decode_hook)
Exemplo n.º 8
0
def clear(path):
    """Delete intermediate and cache data. Only PATH arguments are cleared
    unless no arguments are present, then everything is wiped.

    \b
    $ eregs clear                   # clears everything
    $ eregs clear diff/27 trees     # deletes all cached trees and all CFR
                                    # title 27 diffs
    """
    if path:
        paths = [os.path.join(settings.EREGS_INDEX_ROOT, p) for p in path]

        # Deleting cascades
        DependencyNode.objects.filter(pk__in=paths).delete()
        for path in paths:
            DependencyNode.objects.filter(pk__startswith=path).delete()
    else:
        DependencyNode.objects.all().delete()

    http_client().cache.clear()
Exemplo n.º 9
0
def docs(docket_id, filter_fn=None):
    """Fetch RegsGovDocs representing documents within this docket. Grab all
    types except public submissions. Use `filter_fn` to limit the results"""
    # Use a list for consistent ordering, which is useful for caching
    params = [('api_key', REGS_GOV_KEY), ('dktid', docket_id), ('rpp', 1000),
              ('sb', 'docId'), ('so', 'ASC'), ('dct', 'N+PR+FR+O+SR')]
    results = http_client().get(REGS_GOV_DOC_API, params=params).json()
    if results.get('error'):
        logger.warning("Error retrieving data from regs.gov: %s",
                       results['error'].get('message'))
    for doc_dict in results.get('documents', []):
        if filter_fn is None or filter_fn(doc_dict):
            yield RegsGovDoc(doc_dict['documentId'], doc_dict['title'])
Exemplo n.º 10
0
def docs(docket_id, filter_fn=None):
    """Fetch RegsGovDocs representing documents within this docket. Grab all
    types except public submissions. Use `filter_fn` to limit the results"""
    # Use a list for consistent ordering, which is useful for caching
    params = [('api_key', REGS_GOV_KEY), ('dktid', docket_id), ('rpp', 1000),
              ('sb', 'docId'), ('so', 'ASC'), ('dct', 'N+PR+FR+O+SR')]
    results = http_client().get(REGS_GOV_DOC_API, params=params).json()
    if results.get('error'):
        logger.warning("Error retrieving data from regs.gov: %s",
                       results['error'].get('message'))
    for doc_dict in results.get('documents', []):
        if filter_fn is None or filter_fn(doc_dict):
            yield RegsGovDoc(doc_dict['documentId'], doc_dict['title'])
Exemplo n.º 11
0
def notice_xmls_for_url(notice_url):
    """Find, preprocess, and return the XML(s) associated with a particular FR
    notice url"""
    local_notices = local_copies(notice_url)
    if local_notices:
        logger.info("using local xml for %s", notice_url)
        for local_notice_file in local_notices:
            with open(local_notice_file, 'rb') as f:
                yield NoticeXML(f.read(), local_notice_file).preprocess()
    else:
        # ignore initial slash
        path_parts = urlparse(notice_url).path[1:].split('/')
        client = http_client()
        first_try_url = settings.XML_REPO_PREFIX + '/'.join(path_parts)
        logger.info('trying to fetch notice xml from %s', first_try_url)
        response = client.get(first_try_url)
        if response.status_code != requests.codes.ok:
            logger.info('failed. fetching from %s', notice_url)
            response = client.get(notice_url)
        yield NoticeXML(response.content, notice_url).preprocess()
Exemplo n.º 12
0
def notice_xmls_for_url(doc_num, notice_url):
    """Find, preprocess, and return the XML(s) associated with a particular FR
    notice url"""
    local_notices = local_copies(notice_url)
    if local_notices:
        logger.info("using local xml for %s", notice_url)
        for local_notice_file in local_notices:
            with open(local_notice_file, "rb") as f:
                yield NoticeXML(f.read(), local_notice_file).preprocess()
    else:
        # ignore initial slash
        path_parts = urlparse(notice_url).path[1:].split("/")
        client = http_client()
        first_try_url = settings.XML_REPO_PREFIX + "/".join(path_parts)
        logger.info("trying to fetch notice xml from %s", first_try_url)
        response = client.get(first_try_url)
        if response.status_code != requests.codes.ok:
            logger.info("failed. fetching from %s", notice_url)
            response = client.get(notice_url)
        yield NoticeXML(response.content, notice_url).preprocess()
def fetch_notice_json(cfr_title, cfr_part, only_final=False,
                      max_effective_date=None):
    """Search through all articles associated with this part. Right now,
    limited to 1000; could use paging to fix this in the future."""
    params = {
        "conditions[cfr][title]": cfr_title,
        "conditions[cfr][part]": cfr_part,
        "per_page": 1000,
        "order": "oldest",
        "fields[]": FULL_NOTICE_FIELDS}
    if only_final:
        params["conditions[type][]"] = 'RULE'
    if max_effective_date:
        params["conditions[effective_date][lte]"] = max_effective_date
    url = API_BASE + "articles"
    logger.info("Fetching notices - URL: %s Params: %r", url, params)
    response = http_client().get(url, params=params).json()
    logger.debug("Fetching notices response - %r", response)
    if 'results' in response:
        return response['results']
    else:
        return []
Exemplo n.º 14
0
    def find_part_xml(self, part):
        """Pull the XML for an annual edition, first checking locally"""
        logger.info("Find Part xml for %s CFR %s", self.title, part)
        url = CFR_PART_URL.format(year=self.year, title=self.title,
                                  volume=self.vol_num, part=part)
        filename = url.split('/')[-1]
        for xml_path in settings.LOCAL_XML_PATHS:
            xml_path = os.path.join(xml_path, 'annual', filename)
            logger.debug("Checking locally for file %s", xml_path)
            if os.path.isfile(xml_path):
                with open(xml_path, 'rb') as f:
                    return XMLWrapper(f.read(), xml_path)

        client = http_client()
        first_try_url = settings.XML_REPO_PREFIX + 'annual/' + filename
        logging.info('trying to fetch annual edition from %s', first_try_url)
        response = client.get(first_try_url)
        if response.status_code != requests.codes.ok:
            logger.info('failed. fetching from %s', url)
            response = client.get(url)
        if response.status_code == requests.codes.ok:
            return XMLWrapper(response.content, url)
Exemplo n.º 15
0
 def response(self):
     logger.debug("GET %s", self.url)
     return http_client().get(self.url, stream=True)
Exemplo n.º 16
0
 def response(self):
     logger.debug("GET %s", self.url)
     return http_client().get(self.url, stream=True)