def get_url_from_mapper(filters): """ Sends a request to Gratka's URL mapper which returns a valid URL given the supplied key-value pairs :param filters: see :meth:`gratka.category.get_category` for reference :return: A valid Gratka.pl URL as string """ paramlist = [] for k, v in filters.items(): if isinstance(v, list): for element in v: paramlist.append((k, str(element))) else: paramlist.append((k, str(v))) url = "http://www.gratka.pl/mapper/" payload = "\r\n".join([ "------WebKitFormBoundary7MA4YWxkTrZu0gW\r\nContent-Disposition: form-data; name=\"{0}\"\r\n\r\n{1}" .format(p[0], p[1]) for p in paramlist ]) headers = { 'content-type': "multipart/form-data; boundary=----WebKitFormBoundary7MA4YWxkTrZu0gW", 'cache-control': "no-cache", 'User-Agent': get_random_user_agent() } response = requests.request("POST", url, data=payload.encode("utf-8"), headers=headers) return json.loads(response.text)["redirectUrl"]
def get_url_for_filters(payload): """ Parses url from trojmiasto.pl search engine using POST method for given payload of data :param payload: Tuple of tuples containing POST key and argument :type payload: tuple :return: Url generated by trojmiasto.pl search engine :rtype: str """ response = requests.post(SEARCH_URL, payload, headers={'User-Agent': get_random_user_agent()}) html_parser = BeautifulSoup(response.content, "html.parser") url = html_parser.find(class_="nice-select-tsi").find("option").next_sibling.next_sibling.attrs["value"] return url
def get_content_for_url(url): """ Connects with given url If environmental variable DEBUG is True it will cache response for url in /var/temp directory :param url: Website url :type url: str :return: Response for requested url """ response = requests.get(url, headers={'User-Agent': get_random_user_agent()}) response.raise_for_status() return response
def get_content_from_source(url): """ Connects with given url If environmental variable DEBUG is True it will cache response for url in /var/temp directory :param url: Website url :type url: str :return: Response for requested url """ response = requests.get(url, headers={'User-Agent': get_random_user_agent()}) try: response.raise_for_status() except requests.HTTPError as e: log.warning('Request for {0} failed. Error: {1}'.format(url, e)) return None return response.content
def get_offer_phone_numbers(offer_id, cookie, csrf_token): """ This method makes a request to the OtoDom API asking for the poster's phone number(s) and returns it. :param offer_id: string, taken from context, see the return section of :meth:`scrape.category.get_category` for reference :param cookie: string, see :meth:`scrape.utils.get_cookie_from` for reference :param csrf_token: string, see :meth:`scrape.utils.get_csrf_token` for reference :rtype: list(string) :return: A list of phone numbers as strings (no spaces, no '+48') """ url = "https://www.otodom.pl/ajax/misc/contact/phone/{0}/".format(offer_id) payload = "CSRFToken={0}".format(csrf_token) headers = { 'cookie': "{0}".format(cookie), 'content-type': "application/x-www-form-urlencoded", 'User-Agent': get_random_user_agent() } response = requests.request("POST", url, data=payload, headers=headers) if response.status_code == 404: return [] return json.loads(response.text)["value"]
def get_response_for_url(url): """ :param url: an url, most likely from the :meth:`gratka.utils.get_url` method :return: a requests.response object """ return requests.get(url, headers={'User-Agent': get_random_user_agent()})