def get_endpoint_from_cityname(self, userinput: str, mirror: bool) -> Tuple[str, str]: matching = [] # type: List[Tuple[str, str, str]] for (name, original_id, mirror_id) in self.load_index(): if userinput.casefold() in name.casefold(): # The oparl mirror doesn't give us the system id we need response = requests_get(original_id) system_id = response.json()["system"] if mirror: matching.append((name, system_id, mirror_id)) else: matching.append((name, system_id, original_id)) if len(matching) == 0: raise RuntimeError( f"Could not find anything for '{userinput}'. " f"Please check that it is in the list under {settings.OPARL_INDEX} or provide the body id." ) if len(matching) > 1: exact_matches = [ i for i in matching if i[0].casefold() == userinput.casefold() ] if len(exact_matches) == 1: matching = exact_matches else: logger.warning( f"Found those entries: {json.dumps(matching, indent=4)}") raise RuntimeError( f"There are {len(matching)} matches and {len(exact_matches)} exact matches for '{userinput}' and " "I can't decide which one to use. Please provide a body url yourself." ) return matching[0][1:3]
def get_endpoint_from_cityname(self, userinput: str, mirror: bool) -> Tuple[str, str]: matching = [] # type: List[Tuple[str, str, str]] for (name, original_id, mirror_id) in self.load_index(): if userinput.casefold() in name.casefold(): # The oparl mirror doesn't give us the system id we need response = requests_get(original_id) system_id = response.json()["system"] if mirror: matching.append((name, system_id, mirror_id)) else: matching.append((name, system_id, original_id)) if len(matching) == 0: raise RuntimeError( "Could not find anything for '{}'".format(userinput)) if len(matching) > 1: exact_matches = [ i for i in matching if i[0].casefold() == userinput.casefold() ] if len(exact_matches) == 1: matching = exact_matches else: logger.warning("Found those entries: {}".format( json.dumps(matching, indent=4))) raise RuntimeError(( "There are {} matches and {} exact matchs for '{}' and I can't decide which one to use. " + "Please provide a url yourself.").format( len(matching), len(exact_matches), userinput)) return matching[0][1:3]
def file_serve_proxy(request: HttpRequest, original_file_id: int) -> StreamingHttpResponse: """ Ensure that the file is not deleted in the database """ get_object_or_404(File, id=original_file_id) """ Util to proxy back to the original RIS in case we don't want to download all the files """ url = settings.PROXY_ONLY_TEMPLATE.format(original_file_id) response = requests_get(url, stream=True) return StreamingHttpResponse(response.iter_content(chunk_size=None), status=response.status_code)
def load(self, url: str, query: Optional[Dict[str, str]] = None) -> JSON: logger.debug("Loader is loading {}".format(url)) if query is None: query = dict() response = requests_get(url, params=query) data = response.json() if "id" in data and data["id"] != url: logger.warning( "Mismatch between url and id. url: {} id: {}".format( url, data["id"])) return data
def load(self, url: str, query: Optional[Dict[str, str]] = None) -> JSON: logger.debug(f"Loader is loading {url}") if query is None: query = dict() response = requests_get(url, params=query) data = response.json() if data is None: # json() can actually return None data = dict() if "id" in data and data["id"] != url: logger.warning(f"Mismatch between url and id. url: {url} id: {data['id']}") return data
def load(self, url: str, query: Optional[dict] = None) -> JSON: logger.debug(f"Loader is loading {url}") if query is None: query = dict() try: response = requests_get(url, params=query) except HTTPError as e: if e.response.status_code == 500: logger.error(f"Got an 500 for a CC e-gov request, retrying: {e}") response = requests_get(url, params=query) else: raise text = response.text try: data = json.loads(text) except JSONDecodeError: logger.error( f"The server returned invalid json. This is a bug in the OParl implementation: {url}" ) # Hack with based on std json code to load broken json where the control characters (U+0000 through # U+001F except \n) weren't properly escaped ESCAPE = re.compile(r"[\x00-\x09\x0B-\x1f]") ESCAPE_DCT = {} for i in range(0x20): ESCAPE_DCT.setdefault(chr(i), "\\u{0:04x}".format(i)) def replace(match): return ESCAPE_DCT[match.group(0)] text = ESCAPE.sub(replace, text) data = json.loads(text) if data is None: # json() can actually return None data = dict() if "id" in data and data["id"] != url: logger.warning(f"Mismatch between url and id. url: {url} id: {data['id']}") self.visit(data) return data
def get_loader_from_system(entrypoint: str) -> BaseLoader: response = requests_get(entrypoint) system = response.json() if system.get("contactName") == "STERNBERG Software GmbH & Co. KG": logger.info("Using Sternberg patches") return SternbergLoader(system) elif (system.get("vendor") == "http://cc-egov.de/" or system.get("vendor") == "https://www.cc-egov.de"): logger.info("Using CC e-gov patches") return CCEgovLoader(system) else: logger.info("Using no vendor specific patches") return BaseLoader(system)
def load(self, url: str, query: Optional[Dict[str, str]] = None) -> JSON: if query: # Somacos doesn't like encoded urls url = ( url + "?" + "&".join([key + "=" + value for key, value in query.items()])) logger.debug("Loader is loading {}".format(url)) try: response = requests_get(url) except HTTPError as e: if e.response.status_code == 500: logger.error( f"Got an 500 for a Somacos request, retrying: {e}") response = requests_get(url) else: raise data = response.json() if "id" in data and data["id"] != url: logger.warning( "Mismatch between url and id. url: {} id: {}".format( url, data["id"])) return data
def get_loader_from_system(entrypoint: str) -> BaseLoader: response = requests_get(entrypoint) system = response.json() if system.get("contactName") == "STERNBERG Software GmbH & Co. KG": logger.info("Using Sternberg patches") return SternbergLoader(system) elif (system.get("vendor") == "http://cc-egov.de/" or system.get("vendor") == "https://www.cc-egov.de"): logger.info("Using CC e-gov patches") return CCEgovLoader(system) elif (system.get("vendor") == "http://www.somacos.de" or system.get("product") == "Sitzungsmanagementsystem Session Copyright SOMACOS GmbH & Co. KG"): logger.info("Using Somacos patches ") return SomacosLoader(system) else: logger.info("Using no vendor specific patches") return BaseLoader(system)
def load_index(self) -> List[Tuple[str, str, str]]: """ " Loads the list of known endpoints from the oparl mirror if it has not been loaded yet""" if self.index: return self.index next_page = settings.OPARL_INDEX while next_page: response = requests_get(next_page) data = response.json() next_page = data["links"].get("next") for body in data["data"]: if not "oparl-mirror:originalId" in body: continue self.index.append(( body.get("name") or body["oparl-mirror:originalId"], body["oparl-mirror:originalId"], body["id"], )) return self.index
def get_endpoint_from_body_url(self, userinput: str) -> Tuple[str, str]: # We can't use the resolver here as we don't know the system url yet, which the resolver needs for determining # the cache folder logging.info(f"Using {userinput} as url") response = requests_get(userinput) data = response.json() if data.get("type") not in [ "https://schema.oparl.org/1.0/Body", "https://schema.oparl.org/1.1/Body", ]: raise RuntimeError( "The url you provided didn't point to an oparl body") endpoint_system = data["system"] endpoint_id = data["id"] if userinput != endpoint_id: logger.warning( f"The body's url '{userinput}' doesn't match the body's id '{endpoint_id}'" ) return endpoint_system, endpoint_id
def get_loader_from_body(body_id: str) -> BaseLoader: """ Assumptions: * The body->system link hasn't changed * The system might have, e.g. to a newer version where we don't workarounds anymore """ cached_body = CachedObject.objects.filter(url=body_id).first() if cached_body: logger.info(f"The body {body_id} is cached") system_id = cached_body.data["system"] else: logger.info(f"Fetching the body {body_id}") response = requests_get(body_id) data = response.json() CachedObject.objects.create( url=data["id"], oparl_type=data["type"], data=data, to_import=False ) system_id = data["system"] return get_loader_from_system(system_id)
def get_with_retry_on_500(self, url: str) -> Response: """Custom retry logic with logging and backoff""" current_try = 1 while True: try: return requests_get(url) except HTTPError as e: if e.response.status_code == 500: if current_try == self.max_retries: logger.error( f"Request failed {self.max_retries} times with an Error 500, aborting: {e}" ) raise else: logger.error( f"Got an 500 for a Somacos request, retrying after sleeping {self.error_sleep_seconds}s: {e}" ) time.sleep(self.error_sleep_seconds) current_try += 1 continue else: raise
def load_file(self, url: str) -> Tuple[bytes, Optional[str]]: """ Returns the content and the content type """ response = requests_get(url) content = response.content content_type = response.headers.get("Content-Type") return content, content_type