Пример #1
0
def get_rest_dets_for_rest(BASE_URL):  #given base URL, output all the reviews
    """ Parse restaurant page for rating details for each restaurant """
    # this step should have been done during the scraping of reviews for each restaurant
    # but the full documents were not stored and it would be too consuming to rebuild
    # a process that scrapes the more efficient process
    print("visiting {}".format(BASE_URL))

    session = Session()
    #get first page reviews+page no
    session.head(
        BASE_URL)  #'https://www.burpple.com/mizzy-corner-nasi-lemak/reviews'
    response = session.get(url=BASE_URL)
    soup = BeautifulSoup(response.content, "html.parser")
    #get the num of pages (string time) as well. page 8 is 70

    that_section = soup.find(
        "div", {"class": "ppr_priv_restaurants_detail_info_content"})
    price_section = that_section.find("span", ["ui_column", "is-6 price"]).text
    ratings_section = that_section.find("div", {
        "class": "questionRatings"
    }).findAll("span")
    for rat in ratings_section:
        print(rat['class'])
    print(price_section)
    print(ratings_section)
def get_currently_available_dates():
    print("Getting currently available dates...")
    ua = UserAgent()
    headers = {'User-Agent': ua.random}

    session = Session()
    session.head(
        'https://www.britishcouncil.am/en/exam/ielts/dates-fees-locations', headers=headers)
    response = session.post(url='https://www.britishcouncil.am/en/views/ajax',
                            data={'field_pf_exam_examname_value': 1, 'view_name': 'product_finder_ielts_test_dates',
                                  'view_display_id': 'block_pf_ielts_test_dates', 'view_args': 'en'},
                            headers=headers)
    commands = response.json()
    insert_command = next(e for e in commands if e['command'] == 'insert')
    insert_data = insert_command['data']
    bs = BeautifulSoup(insert_data, features="html.parser")
    table = bs.select("table")[0]
    body = table.find_all('tbody')[0]
    results = []
    for row in body.children:
        if not hasattr(row, 'children'):
            continue
        row_data = {}
        row_children = [e for e in row.children if hasattr(e, 'select')]
        for i, td in enumerate(row_children):
            if i == 0 or i == 2:
                dt = td.select('span.date-display-single')[0].get_text()
                if i == 0:
                    row_data['test_date'] = dt
                if i == 2:
                    row_data['registration_deadline'] = dt
        results.append(row_data)
    print("Got these test date records: ", results)
    return results
Пример #3
0
def get_domain_report(inward_array, var_array):
    for i in inward_array:
        if var_array[0] in i:
            params = str(i[var_array[0]]) + "?key="
            try:
                s = Session()
                s.head("https://api.zetascan.com/v2/check/json/" + params +
                       api_key)
                res = s.get("https://api.zetascan.com/v2/check/json/" +
                            params + api_key)
                json_response = res.json()
            except Exception, e:
                print 'Api Request Error %s' % e
            try:
                i['$ZSStatus'] = json_response['status']
            except Exception:
                pass
            for dt in json_response['results']:
                try:
                    i['$ZSFound'] = dt['found']
                except Exception:
                    pass
                try:
                    if dt['fromSubnet'] != []:
                        i['$ZSFromSubnet'] = dt['fromSubnet']
                except Exception:
                    pass
                try:
                    i['$ZSItem'] = dt['item']
                except Exception:
                    pass
                try:
                    if dt['lastModified'] != []:
                        c = datetime.datetime.utcfromtimestamp(
                            dt['lastModified']).strftime('%Y-%m-%d %H:%M:%S')
                        i['$ZSLastModified'] = c
                except Exception:
                    pass
                try:
                    i['$ZSScore'] = dt['score']
                except Exception:
                    pass
                try:
                    if dt['sources'] != []:
                        i['$ZSSources'] = dt['sources']
                except Exception:
                    pass
                try:
                    i['$ZSWebScore'] = dt['webscore']
                except Exception:
                    pass
                try:
                    i['$ZSWhiteList'] = dt['wl']
                except Exception:
                    pass
                try:
                    if dt['wldata'] != '':
                        i['$ZSWhiteListData'] = dt['wldata']
                except Exception:
                    pass
Пример #4
0
def get_agent_html_by_phone_number(phone_number):
    logger.info("get agent by phone number = %s" % phone_number)
    try:
        session = Session()
        session.head(HEADER_URL)
        response = session.post(url=SEARCH_URL,
                                data={
                                    'type': 'searchSls',
                                    'slsName': '',
                                    'slsEaName': '',
                                    'slsRegNo': '',
                                    'slsMblNum': phone_number,
                                    'answer': ''
                                },
                                headers={
                                    'Referer': HEADER_URL,
                                })
        if "Invalid captcha" in response.text:
            raise ConnectionError
        return response.text
    except ConnectionError as e:
        logger.error("Connection Error with phonenumber = %s" % phone_number +
                     str(e))
        logger.info('Try to resume...')
        time.sleep(3)
        return get_agent_html_by_phone_number(phone_number)
Пример #5
0
def verify_download(session: Session, files: List[str], output_dir: Path) -> List[str]:
    # create a data directory in the current working directory to store the downloaded files
    output_dir.mkdir(exist_ok=True)

    # should existing files be overwritten
    print("Checking for existing files")
    local_files = [file.name.lower() for file in output_dir.iterdir()]
    existing_files = list(filter(lambda file: Path(file).name.lower() in local_files, files))
    if existing_files and input(f"Overwrite {len(existing_files)} exiting files? [y/n]: ") not in ('y', 'ye', 'yes'):
        files = list(filter(lambda file: file not in existing_files, files))

    print('found files: %s' %len(files))
    print('first file on list:')
    print(files[0])

    # calculate total file size so that the user can verify they have enough space
    print("Getting download size.")
    total_size = 0
    if not files:
        print("No files matched the filter or no files were found in the directory, exiting")
        exit()

    for iterNum, file in enumerate(files):
        _redirect = session.head(file)
        _response = session.head(_redirect.url)
        print('checking size for file: %d/%d' %(iterNum+1, len(files)))
        total_size += int(_response.headers.get('content-length'))

    # inform the user before starting download
    if input(f"Download {len(files)}, {total_size // 1024**2} MB? [y/n]: ").lower() not in ('y', 'ye', 'yes'):
        print("Exiting, consider adding more filters or starting at a lower level folder")
        exit()

    return files
Пример #6
0
def r_next_page(url,plot):
	next_url = url
	data = {'scrollOffset':plot}
	session = Session()
	session.head('http://www.radionomy.com')
	response = session.post(
	url =url,
	data=data,
	headers=headers)
	plot = plot + 1
	match = re.compile('href="(.+?)" rel="internal"><img class="radioCover" src="(.+?)" alt="(.+?)" ').findall(str(response.text))
	for url,image,title in match:
		url = str(url).replace('/en/radio', 'http://listen.radionomy.com').replace('/index', '.m3u')
		h = HTMLParser.HTMLParser()
		try: title = h.unescape(title)
		except UnicodeDecodeError:
			continue
		image = image.replace('s67.jpg', 's400.jpg')
		try: add_directory3(title,url,140, defaultfanart ,image,plot='')
		except KeyError:
			continue
		xbmcplugin.setContent(pluginhandle, 'songs')
	add_directory2('Next Page>>', next_url, 133, defaultfanart, defaultimage, plot)
	xbmc.executebuiltin("Container.SetViewMode("+str(confluence_views[6])+")")
	xbmcplugin.endOfDirectory(addon_handle)
Пример #7
0
class GoDaddyAPIs:
    __author__ = "Yuan Zhuang"
    __version__ = "1.0.0"
    __email__ = "*****@*****.**"
    
    def __init__(self, api_key, api_secret, user_id=""):
        self.api_key = api_key
        self.api_secret = api_secret
        self.authorization_string = "sso-key " + self.api_key + ":" + self.api_secret
        self.url_base = "https://api.godaddy.com"
        self.user_id = user_id
        self.session = Session()
        self.session.head(self.url_base)

    def get_info(self, url_get):
        response = self.session.get(
            url = url_get, 
            data = {
                },
            headers = {
                "Authorization": self.authorization_string,
                "Accept": "application/json"
                }
            )
        return response.json()
        
    def get_domains(self):
        # @return an array of domains under the api key
        url_get = self.url_base + "/v1/domains"
        return self.get_info(url_get)

    def get_domain_info(self, domain):
        # @return a json of the info of the domain
        url_get = "/".join([self.url_base, "v1/domains", domain])
        return self.get_info(url_get)

    def get_domain_record(self, domain, record = "A", subdomain=""):
        # @return a json of the info of the record
        url_get = "/".join([self.url_base, "v1/domains", domain, "records", record])
        if len(subdomain)>0:
            url_get = "/".join([url_get, subdomain])
        return self.get_info(url_get)

    def set_subdomain_ip(self, ip, domain, subdomain="www", record = "A",  ttl = 3600):
        # @return if sucess, should return a empty json
        url_put = "/".join([self.url_base, "v1/domains", domain, "records", record, subdomain])
        response = self.session.put(
            url = url_put,
            data = {
                "data": ip
                },
            headers = {
                "Authorization": self.authorization_string,
                "Accept": "application/json"
                }
            )
        return response.json()
Пример #8
0
def property_sdat_render(identifier):

    idpath = str(identifier).split('/')
    
    if len(idpath) <= 0 or len(idpath) > 2:
        error = ('Invalid identifiers specified: %s, length %i not correct.  \nProper usage: sdat/homeAddress (spaces alowed (NOT IMPLEMENTED!)) or sdat/block/lot are the only acceptable formats.') % (','.join(idpath),len(idpath))
        return error
    elif len(idpath) == 1:
        homeAddress = idpath[0]
        block = ''
        lot = ''
    else:
        block = idpath[0]
        lot = idpath[1]
        homeAddress = ''
        
    # Format boilerplate request
    formdict = {'ctl00$ctl00$ctl00$ToolkitScriptManager1':'ctl00$ctl00$ctl00$MainContent$MainContent$cphMainContentArea$ucSearchType$updatePanel1|ctl00$ctl00$ctl00$MainContent$MainContent$cphMainContentArea$ucSearchType$wzrdRealPropertySearch$StepNavigationTemplateContainerID$btnStepNextButton',
        'ctl00$ctl00$ctl00$MainContent$MainContent$cphMainContentArea$ucSearchType$hideBanner':'false',
        '__EVENTTARGET':'',
        '__EVENTARGUMENT':'',
        '__LASTFOCUS':'',
        '__VIEWSTATE':'/wEPDwUKLTI3NzgyMTg5Mw9kFgJmD2QWAmYPZBYCZg9kFgQCAQ9kFggCCQ8VAz0vUmVhbFByb3BlcnR5L2Vnb3YvZnJhbWV3b3Jrcy9ib290c3RyYXAvY3NzL2Jvb3RzdHJhcC5taW4uY3NzKC9SZWFsUHJvcGVydHkvZWdvdi9jc3MvYWdlbmN5LXN0eWxlcy5jc3M1L1JlYWxQcm9wZXJ0eS9lZ292L2Nzcy9tZGdvdl9yZXNwb25zaXZlVGFibGVzLm1pbi5jc3NkAgoPZBYCZg8VASgvUmVhbFByb3BlcnR5L2Vnb3YvY3NzL2FnZW5jeS1oZWFkZXIuY3NzZAILDxUKIS9SZWFsUHJvcGVydHkvZWdvdi9jc3MvaWUvaWU4LmNzcyEvUmVhbFByb3BlcnR5L2Vnb3YvY3NzL2llL2llNy5jc3MiL1JlYWxQcm9wZXJ0eS9lZ292L2pzL2h0bWw1c2hpdi5qcxsvUmVhbFByb3BlcnR5L2Nzcy9wcmludC5jc3MfL1JlYWxQcm9wZXJ0eS9zY3JpcHRzL2dsb2JhbC5qcykvUmVhbFByb3BlcnR5L2Vnb3YvanMvanF1ZXJ5LTEuOC4yLm1pbi5qcykvUmVhbFByb3BlcnR5L3NjcmlwdHMvanF1ZXJ5LmN5Y2xlLmFsbC5qcygvUmVhbFByb3BlcnR5L3NjcmlwdHMvanF1ZXJ5LnZhbGlkYXRlLmpzGi9SZWFsUHJvcGVydHkvSlMvZ2xvYmFsLmpzIC9SZWFsUHJvcGVydHkvZWdvdi9qcy95dWktbWluLmpzZAIMD2QWAgIBD2QWAmYPFQIaL1JlYWxQcm9wZXJ0eS9DU1MvTWFpbi5jc3MgL1JlYWxQcm9wZXJ0eS9jc3MvVGFibGVTdHlsZS5jc3NkAgMPZBYKAgEPZBYCAgEPFgIeBGhyZWYFHGh0dHA6Ly93d3cuZGF0Lm1hcnlsYW5kLmdvdi8WAgIBDxYEHgNzcmMFGX4vZWdvdi9pbWcvU0RBVF9USVRMRS5wbmceA2FsdAU1TWFyeWxhbmQgU3RhdGUgRGVwYXJ0bWVudCBvZiBBc3Nlc3NtZW50cyBhbmQgVGF4YXRpb25kAgMPZBYCAgEPZBYEAgMPZBYCAgEPDxYCHgRUZXh0BQJ3M2RkAgUPZBYCAgEPZBYCZg9kFgJmD2QWBgIFD2QWBAIBDw8WAh8DZWRkAgMPEGRkFgBkAgcPPCsADwEOaBYCZg9kFgICAQ9kFgJmD2QWAmYPZBYKZg9kFgICAQ9kFgJmD2QWAgIBD2QWBAIDDxAPFgYeDURhdGFUZXh0RmllbGQFBXZhbHVlHg5EYXRhVmFsdWVGaWVsZAUDa2V5HgtfIURhdGFCb3VuZGdkEBUZDC1TZWxlY3Qgb25lLQ9BTExFR0FOWSBDT1VOVFkTQU5ORSBBUlVOREVMIENPVU5UWQ5CQUxUSU1PUkUgQ0lUWRBCQUxUSU1PUkUgQ09VTlRZDkNBTFZFUlQgQ09VTlRZD0NBUk9MSU5FIENPVU5UWQ5DQVJST0xMIENPVU5UWQxDRUNJTCBDT1VOVFkOQ0hBUkxFUyBDT1VOVFkRRE9SQ0hFU1RFUiBDT1VOVFkQRlJFREVSSUNLIENPVU5UWQ5HQVJSRVRUIENPVU5UWQ5IQVJGT1JEIENPVU5UWQ1IT1dBUkQgQ09VTlRZC0tFTlQgQ09VTlRZEU1PTlRHT01FUlkgQ09VTlRZFlBSSU5DRSBHRU9SR0UnUyBDT1VOVFkTUVVFRU4gQU5ORSdTIENPVU5UWRFTVC4gTUFSWSdTIENPVU5UWQ9TT01FUlNFVCBDT1VOVFkNVEFMQk9UIENPVU5UWRFXQVNISU5HVE9OIENPVU5UWQ9XSUNPTUlDTyBDT1VOVFkQV09SQ0VTVEVSIENPVU5UWRUZAi0xAjAxAjAyAjAzAjA0AjA1AjA2AjA3AjA4AjA5AjEwAjExAjEyAjEzAjE0AjE1AjE2AjE3AjE4AjE5AjIwAjIxAjIyAjIzAjI0FCsDGWdnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2cWAQIDZAIJDxAPFgYfBAUFdmFsdWUfBQUDa2V5HwZnZBAVBQwtU2VsZWN0IG9uZS0OU1RSRUVUIEFERFJFU1MbUFJPUEVSVFkgQUNDT1VOVCBJREVOVElGSUVSCk1BUC9QQVJDRUwOUFJPUEVSVFkgU0FMRVMVBQItMQIwMQIwMgIwMwIwNBQrAwVnZ2dnZxYBAgNkAgEPZBYCAgEPZBYGAgEPDxYCHwMFJkVudGVyIE1hcCBSZWZlcmVuY2UgZm9yIEJBTFRJTU9SRSBDSVRZZGQCBw8WAh4HVmlzaWJsZWdkAg0PZBYSAgMPEGQPFg1mAgECAgIDAgQCBQIGAgcCCAIJAgoCCwIMFg0QBQVNb250aAUFTW9udGhnEAUDSmFuBQIwMWcQBQNGZWIFAjAyZxAFA01hcgUCMDNnEAUDQXByBQIwNGcQBQNNYXkFAjA1ZxAFA0p1bgUCMDZnEAUDSnVsBQIwN2cQBQNBdWcFAjA4ZxAFA1NlcAUCMDlnEAUDT2N0BQIxMGcQBQNOb3YFAjExZxAFA0RlYwUCMTJnFgECCWQCBQ8QZA8WIGYCAQICAgMCBAIFAgYCBwIIAgkCCgILAgwCDQIOAg8CEAIRAhICEwIUAhUCFgIXAhgCGQIaAhsCHAIdAh4CHxYgEAUDRGF5BQNEYXlnEAUCMDEFAjAxZxAFAjAyBQIwMmcQBQIwMwUCMDNnEAUCMDQFAjA0ZxAFAjA1BQIwNWcQBQIwNgUCMDZnEAUCMDcFAjA3ZxAFAjA4BQIwOGcQBQIwOQUCMDlnEAUCMTAFAjEwZxAFAjExBQIxMWcQBQIxMgUCMTJnEAUCMTMFAjEzZxAFAjE0BQIxNGcQBQIxNQUCMTVnEAUCMTYFAjE2ZxAFAjE3BQIxN2cQBQIxOAUCMThnEAUCMTkFAjE5ZxAFAjIwBQIyMGcQBQIyMQUCMjFnEAUCMjIFAjIyZxAFAjIzBQIyM2cQBQIyNAUCMjRnEAUCMjUFAjI1ZxAFAjI2BQIyNmcQBQIyNwUCMjdnEAUCMjgFAjI4ZxAFAjI5BQIyOWcQBQIzMAUCMzBnEAUCMzEFAjMxZxYBAg1kAgcPDxYCHwMFBDIwMTVkZAILDxBkDxYNZgIBAgICAwIEAgUCBgIHAggCCQIKAgsCDBYNEAUFTW9udGgFBU1vbnRoZxAFA0phbgUCMDFnEAUDRmViBQIwMmcQBQNNYXIFAjAzZxAFA0FwcgUCMDRnEAUDTWF5BQIwNWcQBQNKdW4FAjA2ZxAFA0p1bAUCMDdnEAUDQXVnBQIwOGcQBQNTZXAFAjA5ZxAFA09jdAUCMTBnEAUDTm92BQIxMWcQBQNEZWMFAjEyZxYBAglkAg0PEGQPFiBmAgECAgIDAgQCBQIGAgcCCAIJAgoCCwIMAg0CDgIPAhACEQISAhMCFAIVAhYCFwIYAhkCGgIbAhwCHQIeAh8WIBAFA0RheQUDRGF5ZxAFAjAxBQIwMWcQBQIwMgUCMDJnEAUCMDMFAjAzZxAFAjA0BQIwNGcQBQIwNQUCMDVnEAUCMDYFAjA2ZxAFAjA3BQIwN2cQBQIwOAUCMDhnEAUCMDkFAjA5ZxAFAjEwBQIxMGcQBQIxMQUCMTFnEAUCMTIFAjEyZxAFAjEzBQIxM2cQBQIxNAUCMTRnEAUCMTUFAjE1ZxAFAjE2BQIxNmcQBQIxNwUCMTdnEAUCMTgFAjE4ZxAFAjE5BQIxOWcQBQIyMAUCMjBnEAUCMjEFAjIxZxAFAjIyBQIyMmcQBQIyMwUCMjNnEAUCMjQFAjI0ZxAFAjI1BQIyNWcQBQIyNgUCMjZnEAUCMjcFAjI3ZxAFAjI4BQIyOGcQBQIyOQUCMjlnEAUCMzAFAjMwZxAFAjMxBQIzMWcWAQINZAIPDw8WAh8DBQQyMDE1ZGQCEw8QZGQWAWZkAhcPEGRkFgJmAgFkAjQPEGRkFgRmAgECAgIDZAICD2QWAgIFD2QWBmYPZBYEAgEPDxYCHwNlZGQCAw8QZGQWAGQCAQ88KwARAgEQFgAWABYADBQrAABkAgIPPCsAEQIBEBYAFgAWAAwUKwAAZAIDD2QWAgIHD2QWBgIBD2QWBAIBDw8WAh8DZWRkAgMPEGRkFgBkAgMPPCsACQBkAgUPPCsACQBkAgQPZBYCAgUPZBYGZg9kFgQCAQ8PFgIfA2VkZAIDDxBkZBYAZAIIDzwrABECARAWABYAFgAMFCsAAGQCCg88KwARAgEQFgAWABYADBQrAABkAgkPZBYEAgEPDxYCHwdoZGQCAw8PFgIfB2dkZAIEDxYCHglpbm5lcmh0bWwFPDMwMSBXLiBQcmVzdG9uIFN0LiwgQmFsdGltb3JlLCBNRCAyMTIwMS0yMzk1OyAoNDEwKSA3NjctMTE4NGQCBQ8WAh8IBS9PdXRzaWRlIHRoZSBCYWx0aW1vcmUgTWV0cm8gQXJlYSAoODg4KSAyNDYtNTk0MWQCBg8WAh8IBR1NYXJ5bGFuZCBSZWxheSAoODAwKSA3MzUtMjI1OGQYBgV9Y3RsMDAkY3RsMDAkY3RsMDAkTWFpbkNvbnRlbnQkTWFpbkNvbnRlbnQkY3BoTWFpbkNvbnRlbnRBcmVhJHVjU2VhcmNoVHlwZSR3enJkUmVhbFByb3BlcnR5U2VhcmNoJHVjR3JvdW5kUmVudCRndl9HUlJlZGVtcHRpb24PZ2QFgQFjdGwwMCRjdGwwMCRjdGwwMCRNYWluQ29udGVudCRNYWluQ29udGVudCRjcGhNYWluQ29udGVudEFyZWEkdWNTZWFyY2hUeXBlJHd6cmRSZWFsUHJvcGVydHlTZWFyY2gkdWNTZWFyY2hSZXN1bHQkZ3ZfU2VhcmNoQnlSUFNhbGUPZ2QFhAFjdGwwMCRjdGwwMCRjdGwwMCRNYWluQ29udGVudCRNYWluQ29udGVudCRjcGhNYWluQ29udGVudEFyZWEkdWNTZWFyY2hUeXBlJHd6cmRSZWFsUHJvcGVydHlTZWFyY2gkdWNHcm91bmRSZW50JGd2X0dSUmVnaXN0cmF0b25SZXN1bHQPZ2QFYGN0bDAwJGN0bDAwJGN0bDAwJE1haW5Db250ZW50JE1haW5Db250ZW50JGNwaE1haW5Db250ZW50QXJlYSR1Y1NlYXJjaFR5cGUkd3pyZFJlYWxQcm9wZXJ0eVNlYXJjaA8QZBQrAQICAWYCAWQFcGN0bDAwJGN0bDAwJGN0bDAwJE1haW5Db250ZW50JE1haW5Db250ZW50JGNwaE1haW5Db250ZW50QXJlYSR1Y1NlYXJjaFR5cGUkd3pyZFJlYWxQcm9wZXJ0eVNlYXJjaCRXaXphcmRNdWx0aVZpZXcPD2QCAWQFf2N0bDAwJGN0bDAwJGN0bDAwJE1haW5Db250ZW50JE1haW5Db250ZW50JGNwaE1haW5Db250ZW50QXJlYSR1Y1NlYXJjaFR5cGUkd3pyZFJlYWxQcm9wZXJ0eVNlYXJjaCR1Y1NlYXJjaFJlc3VsdCRndl9TZWFyY2hSZXN1bHQPZ2QiKe/maufK7R523/kR+li5nB83xw==',
        '__VIEWSTATEGENERATOR':'67B65B95',
        '__EVENTVALIDATION':'/wEdAAeLHO4UiazhjJoEWt6WhsuhTM7lR6wkAQA4/LrX3F8+kJFCX6GXNshSNYudQwBINupfx21Q/fwirSrEZb/6IfokQ2/ExUnKIexbtLnm+FwTUQqLyXAQIRgTGoe+xG2l1j+MyhVtrGKhaCDPE00ZGRUyHazg9c4YiKfaisSIpUPyWMst3r0=',
        '__ASYNCPOST':'true',
        'ctl00$ctl00$ctl00$MainContent$MainContent$cphMainContentArea$ucSearchType$wzrdRealPropertySearch$StepNavigationTemplateContainerID$btnStepNextButton':'Next',
        }
    
    # Site requires only specific fields are filled, e.g., if block and lot are available, clear the Address field
    if block != '' and lot != '':
        formdict['ctl00$ctl00$ctl00$MainContent$MainContent$cphMainContentArea$ucSearchType$wzrdRealPropertySearch$ucEnterData$txtMap_Block'] = block
        formdict['ctl00$ctl00$ctl00$MainContent$MainContent$cphMainContentArea$ucSearchType$wzrdRealPropertySearch$ucEnterData$txtMap_Lot'] = lot
        
    ## NOT FUNCTIONAL!
    elif homeAddress != '':
        formdict['ctl00$ctl00$ctl00$MainContent$MainContent$cphMainContentArea$ucSearchType$wzrdRealPropertySearch$ucEnterData$txtStreenNumber'] = homeAddress.split('%20')[0]
        formdict['ctl00$ctl00$ctl00$MainContent$MainContent$cphMainContentArea$ucSearchType$wzrdRealPropertySearch$ucEnterData$txtStreetName'] = ' '.join(homeAddress.split('%20')[1:])
        
        
    session = Session()

    # HEAD requests ask for *just* the headers, which is all you need to grab the
    # session cookie
    session.head('http://sdat.dat.maryland.gov/RealProperty/Pages')

    response = session.post('http://sdat.dat.maryland.gov/RealProperty/Pages/default.aspx',
        data = formdict
    )
        
    # Output rendered HTML page to file in current directory
    #with open('propinfo.html','wb') as f:
    #	f.write(response.text.encode('utf-8'))	
            
    return response.text.encode('utf-8')
Пример #9
0
def generate_session(user):
    """Create session with pharmacists.ab.ca"""
    url = "https://pharmacists.ab.ca"

    try:
        session = Session()
        session.head(url, headers={"user-agent": user})
    except Exception as e:
        log.exception(e)
        session = None

    return session
Пример #10
0
def usedata(url):
    session = Session()
    session.head('https://www.supremenewyork.com/shop/')
    html = download(url)
    poststuff = getdata(html)
    response = session.post(url='https://www.supremenewyork.com/' +
                            poststuff['action'],
                            data=poststuff['data'],
                            headers={'Referer': url})
    print(response.text)
    check = session.get('https://www.supremenewyork.com/shop/')
    tree = lxml.html.fromstring(check.text)
    print(tree.cssselect('div#cart')[0].attrib)
Пример #11
0
def creatTopic(data):
    ua = UserAgent()
    user_agent = {
        'User-agent': ua.random,
        'Referer': 'https://dealbub.com/',
        'Content-type': 'content_type_value'
    }
    session = Session()
    session.head('https://dealbub.com/')
    data = topicContect()
    response = session.post(url='https://alisdeals.com/posts/',
                            headers=user_agent,
                            data=data)
Пример #12
0
def get_session(settings):
    session = Session()
    auth = b64encode(CREDENTIALS.encode()).decode()
    session.headers.update({
        'Authorization': 'Basic {0}'.format(auth),
        'Content-Type': 'text/plain',
        'Accept': '*/*'
    })
    base_url = compose_url(settings, '')
    logging.info('Using base server URL {0}'.format(base_url))
    session.head(base_url)

    # Parse form token: it is the same for all pages in the session (e.g. XAR import, MAIL setting, etc.)
    get_form_token(settings, session)

    return session
Пример #13
0
    def get_enabled_methods(self):

        s = Session()
        headers = {}
        headers['User-agent'] = self.user_agent

        salida = []
        s.mount(self.sitio, HTTPAdapter(max_retries=2))

        if s.put(self.sitio, headers=headers).status_code == 200:
            salida.append("put")
        if s.get(self.sitio, headers=headers).status_code == 200:
            salida.append("get")
        if s.options(self.sitio, headers=headers).status_code == 200:
            salida.append("options")
        if s.post(self.sitio, headers=headers).status_code == 200:
            salida.append("post")
        if s.delete(self.sitio, headers=headers).status_code == 200:
            salida.append("delete")
        if s.head(self.sitio, headers=headers).status_code == 200:
            salida.append("head")
        if s.patch(self.sitio, headers=headers).status_code == 200:
            salida.append("patch")

        return salida
Пример #14
0
    def passive_search_order_headers(self):
        """
            Metodo que realiza una peticion al sitio e intenta inferir el
            servidor por el orden en que aparecen los encabezados de la respuesta
            del servidor. Util cuando la cabecera 'Server' ha sido omitida en la respuesta
        """

        with open(self.srv_dir, "r") as sjson:
            srv_json = loads(sjson.read())

        s = Session()
        headers = {}
        headers['User-agent'] = self.user_agent
        s.mount(self.site, HTTPAdapter(max_retries=2))

        response = s.head(self.site, headers=headers)

        probably_server = self.infer_server(srv_json,
                                            [x for x in response.headers])

        if probably_server:
            self.server = probably_server

        print("se infirio el servidor")
        print(self.server)
Пример #15
0
 def __init__(self, session: requests.Session, url: str) -> None:
     self._session = session
     self._offset = 0
     with session.head(url, headers=self._HEADERS,
                       allow_redirects=True) as resp:
         if resp.status_code == 404:
             raise FileNotFoundError(errno.ENOENT,
                                     os.strerror(errno.ENOENT), url)
         elif resp.status_code in {401, 403}:
             raise PermissionError(errno.EACCES, os.strerror(errno.EACCES),
                                   url)
         resp.raise_for_status()
         if resp.headers.get('Accept-Ranges', 'none') != 'bytes':
             raise OSError(None, 'Server does not accept byte ranges', url)
         if 'Content-Encoding' in resp.headers:
             raise OSError(None, 'Server provided Content-Encoding header',
                           url)
         try:
             self._length = int(resp.headers['Content-Length'])
         except (KeyError, ValueError):
             raise OSError(None,
                           'Server did not provide Content-Length header',
                           url) from None
         # TODO: consider storing ETag/Last-Modified to check for data
         # changing under us.
         self._url = resp.url
         self.content_type = resp.headers.get('Content-Type')
def get_keywords(id_list, filter_list, formatted=False):
    '''
    get_keywords:
        (1) Takes a list of Entrez Gene IDs associated with proteins in Biogrid
            (https://wiki.thebiogrid.org/doku.php/biogrid_tab_version_2.0)
        (2) Queries the NCBI Entrez site at the Gene endpont with that list of IDs
            (https://www.ncbi.nlm.nih.gov/books/NBK25500/#_chapter1_Downloading_Document_Summaries_)
        (3) Retrieves the summaries for each gene by parsing the returned XML
        (4) Uses the gensim NLP library to find keywords in each summary
            (https://radimrehurek.com/gensim/summarization/keywords.html)
        (5) Adds the score for each keyword, accross each summary (i.e. if a keywords appears multiple times,
            add up the score
        (6) And returns a list of those keywords, sorted by their cumulative scores.

        Theoretically, we can use this front-end to quickly figure out similarities between proteins in clusters.

    Arguments:
        (1) id_list (str): Concatenated list of IDs, with each ID separated by a comma ','
        (2) formatted (bool): Indicates whether we want to json-format the result
    '''
    filter_list = [str(filt) for filt in list(filter_list)]    
    session = Session()
    session.head(HEAD)
    response = session.get(url = (URL + id_list))
    words = {}
    root = ElementTree.fromstring(response.text)
    for docsummary in root.find('DocumentSummarySet').findall('DocumentSummary'):
        summary = docsummary.find('Summary').text        
        try: 
            newwords = keywords(str(summary), scores=True, lemmatize=True, deacc=True) 
            for word, score in newwords:
                if word in filter_list:
                    continue
                if not word in words.keys():
                    words[word] = 0
                words[word] += float(score)
        except IndexError:
            continue

    def score(word):
        return 0 - words[word]

    result = sorted(list(words), key=score)
    return json.dumps(result) if formatted else result 
Пример #17
0
def get_tips_from_tipster(urlname):
    session = Session()
    SOURCE_SITE_URL = 'https://' + urlname + '.blogabet.com/blog/dashboard'
    session.head(SOURCE_SITE_URL)
    response = session.get(
        SOURCE_SITE_URL,
        headers={
            'Referer': 'https://' + urlname + '.blogabet.com/',
            'Accept-Encoding': 'gzip, deflate, br',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',
            'Cookie':
            '_ga=GA1.2.1871104935.1529094724; __gads=ID=6ec2f53ceaeb39ff:T=1529137002:S=ALNI_Mao_o9pHbKzQ9jPdq8_B3kdocSMDQ; cookiesDirective=1; _gid=GA1.2.1161230770.1530264484; login_string=37c1a601e9e336eca1d3c7244ed256a631e7c8f0f90a806600c2e3e007764154a56190e191d70bdf3be4ea55df7757ea1838ad2ad836a76a26bc843fcd1b5904; remember_me=1; __atuvc=1%7C26; __atuvs=5b35ff9712464bd9000',
            'X-Compress': 'null',
            'X-Requested-With': 'XMLHttpRequest',
            'Connection': 'keep-alive'
        })
    tree = html.fromstring(response.text)
    return tree.xpath(".//div[@id='_blogPostsContent']/ul/ul/li")
Пример #18
0
def cook_soup(url: str,
              session: r.Session) -> Tuple[Any, Tuple[str, timedelta, int]]:
    """Returns parsed HTML content of web page on provided <url> as <BeautifulSoup> object
    and response statistics of <url> visited.

    Arguments:
        url {str} -- url to page to parse
        session {r.Session} -- requests.Session() object

    Returns:
        Tuple[Any, Tuple[str, timedelta, int]] -- parsed content of the page as BeautifulSoup() object
        and response statistics of <url> visited
    """
    def dummy(status_code: int) -> Tuple[r.Response, Any]:
        response = r.Response()
        response.elapsed = timedelta(seconds=0)
        response.status_code = status_code
        soup = BeautifulSoup("<html></html>", "lxml")
        return (response, soup)

    def color_print(url: str, response_: r.Response) -> None:
        print("URL: '{}' :: it took: '{}' :: response status: '{}'".format(
            color_blue(url),
            color_response_time(response_.elapsed),
            color_response_status(str(response_.status_code)),
        ))

    headers = session.head(url).headers
    dummy_ = None
    response = r.Response()
    soup = BeautifulSoup("", "lxml")

    try:
        if ("text" in headers["content-type"].lower()) or (
                "html" in headers["content-type"].lower()):
            try:
                response = session.get(url, timeout=(60, 120))
                soup = BeautifulSoup(response.text, "lxml")
            except Exception as e:
                print(f"Func 'cook_soup': Exception encountered: {str(e)}")
                dummy_ = dummy(400)
        else:
            dummy_ = dummy(418)
    except KeyError as e:
        print(f"Func 'cook_soup': Exception encountered: {str(e)}")
        dummy_ = dummy(400)

    sleep(1)

    if dummy_:
        color_print(url, dummy_[0])
        return (dummy_[1], (url, dummy_[0].elapsed, dummy_[0].status_code))

    color_print(url, response)
    return (soup, (url, response.elapsed, response.status_code))
Пример #19
0
def download(s: Session, homework: dict, directory: Path):
    data = '[{"name":"sEcho","value":1},{"name":"iColumns","value":12},{"name":"sColumns","value":",,,,,,,,,,,"},'\
           '{"name":"iDisplayStart","value":0},{"name":"iDisplayLength","value":"-1"},{"name":"mDataProp_0",'\
           '"value":"function"},{"name":"bSortable_0","value":false},{"name":"mDataProp_1","value":"qzmc"},'\
           '{"name":"bSortable_1","value":true},{"name":"mDataProp_2","value":"xh"},{"name":"bSortable_2",'\
           '"value":true},{"name":"mDataProp_3","value":"xm"},{"name":"bSortable_3","value":true},'\
           '{"name":"mDataProp_4","value":"dwmc"},{"name":"bSortable_4","value":false},'\
           '{"name":"mDataProp_5","value":"bm"},{"name":"bSortable_5","value":false},'\
           '{"name":"mDataProp_6","value":"xzsj"},{"name":"bSortable_6","value":true},'\
           '{"name":"mDataProp_7","value":"scsjStr"},{"name":"bSortable_7","value":false},'\
           '{"name":"mDataProp_8","value":"pyzt"},{"name":"bSortable_8","value":true},'\
           '{"name":"mDataProp_9","value":"cj"},{"name":"bSortable_9","value":true},'\
           '{"name":"mDataProp_10","value":"jsm"},{"name":"bSortable_10","value":true},'\
           '{"name":"mDataProp_11","value":"function"},{"name":"bSortable_11","value":false},'\
           '{"name":"iSortCol_0","value":2},{"name":"sSortDir_0","value":"asc"},{"name":"iSortingCols","value":1},'\
           '{"name":"zyid","value":"%s"},{"name":"wlkcid","value":"%s"}]' % (
               homework['zyid'], homework['wlkcid'])
    url = 'https://learn.tsinghua.edu.cn/b/wlxt/kczy/xszy/teacher/getDoneInfo'
    students = s.post(url, data={'aoData': data}).json()['object']['aaData']
    directory = directory / homework['bt']
    directory.mkdir(parents=True, exist_ok=True)
    for student in tqdm(students):
        base_url = 'https://learn.tsinghua.edu.cn/b/wlxt/kczy/xszy/teacher/downloadFile'
        if not student['zyfjid']:
            continue
        url = f'{base_url}/{homework["wlkcid"]}/{student["zyfjid"]}'
        headers = s.head(url).headers
        raw_filename = re.search('filename="(.*?)"',
                                 headers['Content-Disposition']).group(1)
        suffix = Path(raw_filename).suffix
        filename = f'{student["xh"]}-{student["xm"]}' + suffix
        path = directory / filename
        size = int(headers['Content-Length'])
        if path.is_file() and path.stat().st_size == size:
            sleep(0.01)
            continue
        response = s.get(url, stream=True)
        assert response.status_code == 200
        with open(path, 'wb') as file:
            for chunk in tqdm(response.iter_content(32768)):
                if chunk:
                    file.write(chunk)
        if path.suffix in ['.rar', '.zip', '.7z']:
            arch = pyunpack.Archive(path)
            extract_to = directory / path.stem
            extract_to.mkdir(exist_ok=True)
            try:
                arch.extractall(directory=extract_to)
            except Exception as e:
                print(type(e).mro()[0], *e.args)
                print('Failed to extract', path)
        else:
            print('无法识别压缩文件', path)
Пример #20
0
    def get_plugins(self):
        """
            Metodo para buscar plugins habilitados en el CMS.
            Obtiene los directorios donde se encuentran los plugins del
            JSON de configuracion asi como la ruta al archivo de los plugins
            de tal CMS.
        """
        with open(self.cms_dir, "r") as cmsJSON:
            cms_json = loads(cmsJSON.read())

        plugins_dir = cms_json[self.cms]['plugins_dir']
        plugins_txt = cms_json[self.cms]['plugins']

        if self.root[-1] != '/':
            plugins_dir = self.root + '/' + plugins_dir
        else:
            plugins_dir = self.root + plugins_dir

        # Lista donde se almacenan los plugins instalados
        installed_plugins = []
        # Contador de plugins que han sido buscados
        cont = 0
        try:
            with open(plugins_txt, "r") as plugins_file:
                # Itera por cada plugin en el archivo para buscarlo
                for plugin in plugins_file:
                    cont += 1
                    # quita el salto de linea
                    plugin = plugin[:-1]
                    url2plugin = plugins_dir + plugin
                    print("Buscando plugin  " +
                          url2plugin) if self.verbose else None
                    s = Session()
                    s.mount(url2plugin, HTTPAdapter(max_retries=2))
                    headers = {}
                    headers['User-agent'] = self.user_agent
                    response = s.head(url2plugin, headers=headers)

                    if (response.status_code >= 200
                            or response.status_code == 403):
                        print("\tPlugin " + plugin +
                              " encontrado!") if self.verbose else None
                        installed_plugins.append(plugin)

                    # Control para detener la busqueda inmensa de plugins
                    if cont == self.count_plugins:
                        break

                self.installed_plugins = installed_plugins

        except IOError:
            print("error al abrir el archivo" + plugins_txt)
Пример #21
0
def check_url(match_tuple: MatchTuple,
              http_session: requests.Session) -> Tuple[bool, str]:
    """Check if a URL is reachable."""
    try:
        result = http_session.head(match_tuple.link,
                                   timeout=5,
                                   allow_redirects=True)
        return (
            result.ok or result.status_code in OK_STATUS_CODES,
            f'status code = {result.status_code}',
        )
    except (requests.ConnectionError, requests.Timeout):
        return False, 'connection error'
Пример #22
0
class CRequests:

    def __init__(self, CONNECT_TIMEOUT=CONNECT_TIMEOUT, READ_TIMEOUT=READ_TIMEOUT, stream=False):
        self.requests = Session()
        self.requests.stream = stream
        self.requests.trust_env = False
        self.requests.mount('http://', adapters.HTTPAdapter(pool_connections=NUM_POOLS,
                                                            pool_maxsize=POOL_MAXSIZE))
        self.requests.mount('https://', adapters.HTTPAdapter(pool_connections=NUM_POOLS,
                                                             pool_maxsize=POOL_MAXSIZE))

        self.tuple = (CONNECT_TIMEOUT, READ_TIMEOUT)

    def request(self, method, url, **kwargs):
        kwargs.setdefault('timeout', self.tuple)
        return self.requests.request(method, url, **kwargs)

    def get(self, url, **kwargs):
        kwargs.setdefault('timeout', self.tuple)
        return self.requests.get(url, **kwargs)

    def options(self, url, **kwargs):
        kwargs.setdefault('timeout', self.tuple)
        return self.requests.options(url, **kwargs)

    def head(self, url, **kwargs):
        kwargs.setdefault('timeout', self.tuple)
        return self.requests.head(url, **kwargs)

    def post(self, url, data=None, json=None, **kwargs):
        kwargs.setdefault('timeout', self.tuple)
        return self.requests.post(url, data=data, json=json, **kwargs)

    def put(self, url, data=None, **kwargs):
        kwargs.setdefault('timeout', self.tuple)
        return self.requests.put(url, data=data, **kwargs)

    def patch(self, url, data=None, **kwargs):
        kwargs.setdefault('timeout', self.tuple)
        return self.requests.patch(url, data=data, **kwargs)

    def delete(self, url, **kwargs):
        kwargs.setdefault('timeout', self.tuple)
        return self.requests.delete(url, **kwargs)

    def close(self):
        self.requests.close()
Пример #23
0
 def __init__(self,
              session: Session,
              url: str,
              chunk_size: int = CONTENT_CHUNK_SIZE) -> None:
     response = session.head(url)
     response.raise_for_status()
     assert response.status_code == 200
     headers = response.headers
     self.session, self.url, self.chunk_size = session, url, chunk_size
     self.length = int(headers['Content-Length'])
     self.file = NamedTemporaryFile()
     self.truncate(self.length)
     self.left: List[int] = []
     self.right: List[int] = []
     self.accept_ranges = 'bytes' in headers.get('Accept-Ranges', 'none')
     with self.stay():
         self.__post_init__()
Пример #24
0
def retrieve_img(unsplash_id: str, dl_path: Path,
                 ss: requests.Session) -> None:
    # sanity checks
    if not unsplash_id:
        raise ValueError(f"unsplash_id is empty.")
    if dl_path.is_file():
        raise OSError(f"{dl_path.absolute()} already exists.")

    r = ss.head(f"https://source.unsplash.com/{unsplash_id}/3000x1854")
    if r.ok:
        true_url = r.headers.get("Location")
        if true_url:
            img = ss.get(true_url)
            if img.ok:
                print(f"-- Downloading {true_url} to {dl_path}")
                with open(dl_path, "wb") as f:
                    f.write(img.content)
Пример #25
0
    def check_url(self, session: requests.Session) -> Optional[bool]:
        """
        Check if URL is valid, return result and save it.

        Result is `None` if URL is undefined, `True` if URL is correct
        (HEAD request returns 200), `False` otherwise.
        """
        if self.url is not None:
            url_ok: bool = True
            try:
                r1 = session.head(self.url)
            except requests.exceptions.RequestException:
                url_ok = False
            else:
                if r1.status_code != 200:
                    url_ok = False
            self.url_ok = url_ok
        else:
            self.url_ok = None
        return self.url_ok
Пример #26
0
class HTTPStorage(Storage):
    def __init__(self, base_url=None):
        self._base_url = base_url

        if not self._session:
            self._session = Session()

    def _value_or_setting(self, value, setting):
        return setting if value is None else value

    @cached_property
    def base_url(self):
        if self._base_url is not None and not self._base_url.endswith('/'):
            self._base_url += '/'
        return self._value_or_setting(self._base_url, settings.MEDIA_URL)

    def _url(self, name):
        url = urljoin(self.base_url, name.lstrip("/"))
        assert (url.startswith(self.base_url))
        return url

    def url(self, name):
        return self._url(name)

    def delete(self, name):
        self._session.delete(self._url(name))

    def exists(self, name):
        r = self._session.head(self._url(name))
        if r.status_code >= 200 and r.status_code < 300:
            return True
        if r.status_code == 404:
            return False
        r.raise_for_status()

    def _save(self, name, content):
        self._session.put(self._url(name), data=content)
        return name

    def _open(name, mode='rb'):
        raise NotImplementedError()  # TODO
Пример #27
0
    def make_requests(self, full_url, files2search):
        """
            Metodo que realiza una peticion HEAD al recurso dado como
            parametro (full_url), si se obtiene un codigo 200 o 403 asumimos
            que tal recurso existe.

            Retorna cuantos recursos existen de la lista dada como segundo
            argumento (files2search).
        """
        cont = 0
        try:
            for fl in files2search:
                url_file = ""
                # Concatenacion de URL y recurso a buscar
                # agrega el / si es necesario

                if full_url[-1] != '/':
                    url_file = full_url + '/' + fl
                else:
                    url_file = full_url + fl

                s = Session()
                headers = {}
                headers['User-agent'] = self.user_agent
                s.mount(url_file, HTTPAdapter(max_retries=2))

                response = s.head(url_file, headers=headers)

                if (response.status_code == 200
                        or response.status_code == 403):
                    message = '\t%s : File found    (CODE:%d)' % (
                        url_file, response.status_code)
                    print(message) if self.verbose else None
                    cont += 1

        except ConnectionError:
            pass

        finally:
            return cont
Пример #28
0
    def get_themes(self):
        if self.cms == "Joomla!":
            return None

        with open(self.cms_dir, "r") as cmsJSON:
            cms_json = loads(cmsJSON.read())

        themes_dir = list(cms_json[self.cms]['themes_dir'].values())
        themes_txt = cms_json[self.cms]['themes']

        themes_l = []
        for directory in themes_dir:
            cont = 0
            if self.root[-1] != '/':
                directory = self.root + '/' + directory
            else:
                directory = self.root + directory

            with open(themes_txt, "r") as themes_list:
                for theme in themes_list:
                    cont += 1
                    theme = theme[:-1]
                    url2theme = directory + theme
                    print("Buscando  " + url2theme) if self.verbose else None
                    s = Session()
                    s.mount(url2theme, HTTPAdapter(max_retries=2))
                    headers = {}
                    headers['User-agent'] = self.user_agent
                    response = s.head(url2theme, headers=headers)
                    #Si se obtiene respuesta 200 o 403, es que este recurso existe
                    if (response.status_code >= 200
                            or response.status_code == 403):
                        print("\tTema " + theme +
                              " encontrado!!") if self.verbose else None
                        themes_l.append(theme)
                    if cont >= self.count_themes:
                        break

        self.installed_themes = themes_l
Пример #29
0
class HttpChecker(BaseChecker):

    default_schema = "http://"
    default_schema_re = re.compile("^[hH][tT][tT][pP].*")

    def __init__(self, target: str, port: int):
        super(HttpChecker, self).__init__(target)
        self.port = port
        self.session = Session()

    def check(self) -> Response:

        url = f"{self.target}:{self.port}"
        if not self.default_schema_re.match(url):
            url = f"{self.default_schema}{url}"

        start_time = time.time()
        try:
            request = self.session.head(
                url,
                allow_redirects=True,
            )
        # TODO: requests.exceptions.InvalidURL failed to parse exception
        except ConnectionError:
            return Response(status=ResponseStatus.ERROR,
                            payload=ErrorPayload(
                                message="Failed to establish a new connection",
                                code=ErrorCodes.ConnectError,
                            ),
                            node=self.node_info)

        end_time = time.time()

        return Response(status=ResponseStatus.OK,
                        payload=HttpCheckerResponse(
                            time=end_time - start_time,
                            status_code=request.status_code),
                        node=self.node_info)
Пример #30
0
    def passive_search_server_header(self):
        """
            Metodo que realiza una peticion al sitio y busca en las cabeceras
            la cabecera 'Server' para identificar el servidor y su version en caso
            de ser posible
        """
        ### patron = (servidor)(/(ve.rs.ion) (otros datos))
        ### grupo1 = servidor
        ### grupo3 = version or None
        ### grupo4 == otros datos or None
        info_pattern = "([\w-]*)(/(\d{1,2}.?\d{0,2}.?\d{0,2}) ?(.*)?)?"

        s = Session()
        headers = {}
        headers['User-agent'] = self.user_agent
        s.mount(self.site, HTTPAdapter(max_retries=2))

        response = s.head(self.site, headers=headers)

        # Si no existe la cabecera 'Server' se retorna falso
        # para utilizar otro metodo
        if not response.headers['Server']:
            return False

        pattern = search(info_pattern, response.headers['Server'])

        self.server = pattern.group(1)
        self.version = pattern.group(3)
        self.other_data = pattern.group(4)

        print("##############") if self.verbose else None
        print(si.site) if self.verbose else None
        print(self.server) if self.verbose else None
        print(self.version) if self.verbose else None
        print(self.other_data) if self.verbose else None

        return True
Пример #31
0
    # create a dictionary of the review to return
    review_dict = {"date": review_date, "rating": review_stars, "text": review_text}

    return review_dict


if __name__ == "__main__":
    # appId = 'com.marriott.mrt'
    if not len(argv) - 1:
        stderr.write("Usage: \n\t%s android-app-id\n" % argv[0])
        exit(1)

    appId = argv[1]
    session = Session()
    session.head("https://play.google.com/store/apps/details?id=%s" % appId)

    i = 0
    all_reviews = []
    dup_counts = 0

    while True:
        reviews = download_reviews(i)
        if not len(reviews):
            stderr.write("No more accessible reviews\n")
            break

        old_count = len(set([x["text"] + x["rating"] + x["text"] for x in all_reviews]))
        all_reviews += reviews
        new_count = len(set([x["text"] + x["rating"] + x["text"] for x in all_reviews]))
        if new_count == old_count:
Пример #32
0
def property_info_render(identifier):

    idpath = str(identifier).split('/')
    
    if len(idpath) <= 0 or len(idpath) > 2:
        error = ('Invalid identifiers specified: length %i not correct.  \nProper usage: tax/homeAddress (spaces alowed) or tax/block/lot are the only acceptable formats.' % len(idpath))
    #     return error
    elif len(idpath) == 1:
        homeAddress = idpath[0]
        block = ''
        lot = ''
    else:
        block = idpath[0]
        lot = idpath[1]
        homeAddress = ''
    
    # Format boilerplate request
    formdict = {}
    
    formdict['__EVENTTARGET'] = 'ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$DataGrid1$ctl02$lnkBtnSelect'
    formdict['__VIEWSTATE'] = '/wEPDwUKMTQ5MjEyMzA1OA8WAh4FWWVhcnMVBAQyMDE2BDIwMTUEMjAxNAQyMDEzFgJmD2QWAmYPZBYEZg9kFgQCAg8WAh4EVGV4dGVkAgUPFgIeB1Zpc2libGVnFgJmDxYCHwFlZAIBD2QWCgIBDw8WAh4ISW1hZ2VVcmwFVmh0dHA6Ly9jaXR5c2VydmljZXMuYmFsdGltb3JlY2l0eS5nb3YvcmVtb3RlbWFzdGVydjMvaW1hZ2VzL2ludGVybmV0L2ljb25zL2xvYWRpbmcuZ2lmZGQCBA8WAh8CZ2QCBg8WAh8CZxYCAgEPFgIfAQUNUmVhbCBQcm9wZXJ0eWQCBw9kFggCAQ9kFgICAQ9kFgRmDw8WBh8BBRJTZWFyY2ggVW5hdmFpbGFibGUeB1Rvb2xUaXAFOFNlYXJjaCBpcyBjdXJyZW50bHkgdW5hdmFpbGFibGUsIHBsZWFzZSB0cnkgYWdhaW4gbGF0ZXIuHghSZWFkT25seWcWBB4Hb25mb2N1cwUxaWYodGhpcy52YWx1ZT09J0tleXdvcmQgb3IgU2VhcmNoJyl0aGlzLnZhbHVlPScnOx4Gb25ibHVyBTFpZih0aGlzLnZhbHVlPT0nJyl0aGlzLnZhbHVlPSdLZXl3b3JkIG9yIFNlYXJjaCc7ZAIBDw8WAh4HRW5hYmxlZGgWAh4Hb25jbGljawVoaWYoZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoJ2N0bDAwX2N0bDAwX3R4dEdvb2dsZUN1c3RvbVNlYXJjaCcpLnZhbHVlPT0nS2V5d29yZCBvciBTZWFyY2gnKXJldHVybiBmYWxzZTtkAgIPZBYEAgEPFgIfAQUMRmluYW5jZSBNZW51ZAIDDxQrAAIUKwACDxYGHgtfIURhdGFCb3VuZGceF0VuYWJsZUFqYXhTa2luUmVuZGVyaW5naB4MRGF0YVNvdXJjZUlEBRJTaXRlTWFwRGF0YVNvdXJjZTFkDxQrABMUKwACDxYIHwEFBEhvbWUeC05hdmlnYXRlVXJsBTlodHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2dvdmVybm1lbnQvZmluYW5jZS9pbmRleC5waHAeBVZhbHVlBQRIb21lHwQFBEhvbWVkZBQrAAIPFggfAQUUQWNjb3VudGluZyAmIFBheXJvbGwfDQU7aHR0cDovL3d3dy5iYWx0aW1vcmVjaXR5Lmdvdi9nb3Zlcm5tZW50L2ZpbmFuY2UvYWNjb3VudC5waHAfDgUUQWNjb3VudGluZyAmIFBheXJvbGwfBAUUQWNjb3VudGluZyAmIFBheXJvbGxkZBQrAAIPFggfAQUcQnVkZ2V0ICYgTWFuYWdlbWVudCBSZXNlYXJjaB8NBThodHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2dvdmVybm1lbnQvZmluYW5jZS9iYm1yLnBocB8OBRxCdWRnZXQgJiBNYW5hZ2VtZW50IFJlc2VhcmNoHwQFHEJ1ZGdldCAmIE1hbmFnZW1lbnQgUmVzZWFyY2hkZBQrAAIPFggfAQUJUHVyY2hhc2VzHw0FPWh0dHA6Ly93d3cuYmFsdGltb3JlY2l0eS5nb3YvZ292ZXJubWVudC9maW5hbmNlL3B1cmNoYXNlcy5waHAfDgUJUHVyY2hhc2VzHwQFCVB1cmNoYXNlc2RkFCsAAg8WCB8BBQ9SaXNrIE1hbmFnZW1lbnQfDQU8aHR0cDovL3d3dy5iYWx0aW1vcmVjaXR5Lmdvdi9nb3Zlcm5tZW50L2ZpbmFuY2Uvcmlza21nbXQucGhwHw4FD1Jpc2sgTWFuYWdlbWVudB8EBQ9SaXNrIE1hbmFnZW1lbnRkZBQrAAIPFggfAQUTVHJlYXN1cnkgTWFuYWdlbWVudB8NBTxodHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2dvdmVybm1lbnQvZmluYW5jZS90cmVhc3VyeS5waHAfDgUTVHJlYXN1cnkgTWFuYWdlbWVudB8EBRNUcmVhc3VyeSBNYW5hZ2VtZW50ZGQUKwACDxYIHwEFE1JldmVudWUgQ29sbGVjdGlvbnMfDQU7aHR0cDovL3d3dy5iYWx0aW1vcmVjaXR5Lmdvdi9nb3Zlcm5tZW50L2ZpbmFuY2UvcmV2ZW51ZS5waHAfDgUTUmV2ZW51ZSBDb2xsZWN0aW9ucx8EBRNSZXZlbnVlIENvbGxlY3Rpb25zZGQUKwACDxYIHwEFE0RvY3VtZW50cyAmIFJlcG9ydHMfDQU4aHR0cDovL3d3dy5iYWx0aW1vcmVjaXR5Lmdvdi9nb3Zlcm5tZW50L2ZpbmFuY2UvZG9jcy5waHAfDgUTRG9jdW1lbnRzICYgUmVwb3J0cx8EBRNEb2N1bWVudHMgJiBSZXBvcnRzZGQUKwACDxYIHwEFD09ubGluZSBQYXltZW50cx8NBS1odHRwOi8vY2l0eXNlcnZpY2VzLmJhbHRpbW9yZWNpdHkuZ292L3BheXN5cy8fDgUPT25saW5lIFBheW1lbnRzHwQFD09ubGluZSBQYXltZW50c2RkFCsAAg8WCB8BBRM8aDI+RkFRIC8gSGVscDwvaDI+Hw0FDy9SZWFsUHJvcGVydHkvIx8OBRM8aDI+RkFRIC8gSGVscDwvaDI+HwRlZGQUKwACDxYIHwEFDVRheCBTYWxlIEZBUXMfDQUbaHR0cDovL3d3dy5iaWRiYWx0aW1vcmUuY29tHw4FDVRheCBTYWxlIEZBUXMfBAUNVGF4IFNhbGUgRkFRc2RkFCsAAg8WCB8BBRFQYXJraW5nIEZpbmVzIEZBUR8NBUFodHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2Fuc3dlcnMvaW5kZXgucGhwP2FjdGlvbj1zaG93JmNhdD0xMB8OBRFQYXJraW5nIEZpbmVzIEZBUR8EBRFQYXJraW5nIEZpbmVzIEZBUWRkFCsAAg8WCB8BBRFSZWFsIFByb3BlcnR5IEZBUR8NBUFodHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2Fuc3dlcnMvaW5kZXgucGhwP2FjdGlvbj1zaG93JmNhdD0xMh8OBRFSZWFsIFByb3BlcnR5IEZBUR8EBRFSZWFsIFByb3BlcnR5IEZBUWRkFCsAAg8WCB8BBRVQYXJraW5nIEZpbmVzIExpc3RpbmcfDQVraHR0cDovL3d3dy5iYWx0aW1vcmVjaXR5Lmdvdi9nb3Zlcm5tZW50L3RyYW5zcG9ydGF0aW9uL2Rvd25sb2Fkcy8xMjA3LzEyMTkwNyBQYXJraW5nIEZpbmVzIExpc3RpbmcgMjAwNy5wZGYfDgUVUGFya2luZyBGaW5lcyBMaXN0aW5nHwQFFVBhcmtpbmcgRmluZXMgTGlzdGluZ2RkFCsAAg8WCB8BBRhBdm9pZGluZyBQYXJraW5nIFRpY2tldHMfDQVoaHR0cDovL3d3dy5iYWx0aW1vcmVjaXR5Lmdvdi9nb3Zlcm5tZW50L3RyYW5zcG9ydGF0aW9uL2Rvd25sb2Fkcy8xMjA3LzEyMTkwNyBQYXJraW5nIFRpY2tldCBCcm9jaHVyZS5wZGYfDgUYQXZvaWRpbmcgUGFya2luZyBUaWNrZXRzHwQFGEF2b2lkaW5nIFBhcmtpbmcgVGlja2V0c2RkFCsAAg8WCB8BBRFUcmFuc2ZlciBUYXggVW5pdB8NBUFodHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2Fuc3dlcnMvaW5kZXgucGhwP2FjdGlvbj1zaG93JmNhdD0xMR8OBRFUcmFuc2ZlciBUYXggVW5pdB8EBRFUcmFuc2ZlciBUYXggVW5pdGRkFCsAAg8WCB8BBQpMaWVucyBVbml0Hw0FPWh0dHA6Ly93d3cuYmFsdGltb3JlY2l0eS5nb3YvZ292ZXJubWVudC9maW5hbmNlL2ZhcXRsaWVucy5waHAfDgUKTGllbnMgVW5pdB8EBQpMaWVucyBVbml0ZGQUKwACDxYIHwEFF0xpZW4gQ2VydGlmaWNhdGUgUG9saWN5Hw0FX2h0dHA6Ly93d3cuYmFsdGltb3JlY2l0eS5nb3YvZ292ZXJubWVudC9maW5hbmNlL2ltYWdlcy9MaWVuIENlcnRpZmljYXRlIHBvbGljeSBfMl8gT2N0IDIwMDgucGRmHw4FF0xpZW4gQ2VydGlmaWNhdGUgUG9saWN5HwQFF0xpZW4gQ2VydGlmaWNhdGUgUG9saWN5ZGQUKwACDxYIHwEFCENvbnRhY3RzHw1lHw4FCENvbnRhY3RzHwRlZGQPFCsBE2ZmZmZmZmZmZmZmZmZmZmZmZmYWAQVzVGVsZXJpay5XZWIuVUkuUmFkTWVudUl0ZW0sIFRlbGVyaWsuV2ViLlVJLCBWZXJzaW9uPTIwMDguMi44MjYuMjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49MTIxZmFlNzgxNjViYTNkNGQWJmYPDxYIHwEFBEhvbWUfDQU5aHR0cDovL3d3dy5iYWx0aW1vcmVjaXR5Lmdvdi9nb3Zlcm5tZW50L2ZpbmFuY2UvaW5kZXgucGhwHw4FBEhvbWUfBAUESG9tZWRkAgEPDxYIHwEFFEFjY291bnRpbmcgJiBQYXlyb2xsHw0FO2h0dHA6Ly93d3cuYmFsdGltb3JlY2l0eS5nb3YvZ292ZXJubWVudC9maW5hbmNlL2FjY291bnQucGhwHw4FFEFjY291bnRpbmcgJiBQYXlyb2xsHwQFFEFjY291bnRpbmcgJiBQYXlyb2xsZGQCAg8PFggfAQUcQnVkZ2V0ICYgTWFuYWdlbWVudCBSZXNlYXJjaB8NBThodHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2dvdmVybm1lbnQvZmluYW5jZS9iYm1yLnBocB8OBRxCdWRnZXQgJiBNYW5hZ2VtZW50IFJlc2VhcmNoHwQFHEJ1ZGdldCAmIE1hbmFnZW1lbnQgUmVzZWFyY2hkZAIDDw8WCB8BBQlQdXJjaGFzZXMfDQU9aHR0cDovL3d3dy5iYWx0aW1vcmVjaXR5Lmdvdi9nb3Zlcm5tZW50L2ZpbmFuY2UvcHVyY2hhc2VzLnBocB8OBQlQdXJjaGFzZXMfBAUJUHVyY2hhc2VzZGQCBA8PFggfAQUPUmlzayBNYW5hZ2VtZW50Hw0FPGh0dHA6Ly93d3cuYmFsdGltb3JlY2l0eS5nb3YvZ292ZXJubWVudC9maW5hbmNlL3Jpc2ttZ210LnBocB8OBQ9SaXNrIE1hbmFnZW1lbnQfBAUPUmlzayBNYW5hZ2VtZW50ZGQCBQ8PFggfAQUTVHJlYXN1cnkgTWFuYWdlbWVudB8NBTxodHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2dvdmVybm1lbnQvZmluYW5jZS90cmVhc3VyeS5waHAfDgUTVHJlYXN1cnkgTWFuYWdlbWVudB8EBRNUcmVhc3VyeSBNYW5hZ2VtZW50ZGQCBg8PFggfAQUTUmV2ZW51ZSBDb2xsZWN0aW9ucx8NBTtodHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2dvdmVybm1lbnQvZmluYW5jZS9yZXZlbnVlLnBocB8OBRNSZXZlbnVlIENvbGxlY3Rpb25zHwQFE1JldmVudWUgQ29sbGVjdGlvbnNkZAIHDw8WCB8BBRNEb2N1bWVudHMgJiBSZXBvcnRzHw0FOGh0dHA6Ly93d3cuYmFsdGltb3JlY2l0eS5nb3YvZ292ZXJubWVudC9maW5hbmNlL2RvY3MucGhwHw4FE0RvY3VtZW50cyAmIFJlcG9ydHMfBAUTRG9jdW1lbnRzICYgUmVwb3J0c2RkAggPDxYIHwEFD09ubGluZSBQYXltZW50cx8NBS1odHRwOi8vY2l0eXNlcnZpY2VzLmJhbHRpbW9yZWNpdHkuZ292L3BheXN5cy8fDgUPT25saW5lIFBheW1lbnRzHwQFD09ubGluZSBQYXltZW50c2RkAgkPDxYIHwEFEzxoMj5GQVEgLyBIZWxwPC9oMj4fDQUPL1JlYWxQcm9wZXJ0eS8jHw4FEzxoMj5GQVEgLyBIZWxwPC9oMj4fBGVkZAIKDw8WCB8BBQ1UYXggU2FsZSBGQVFzHw0FG2h0dHA6Ly93d3cuYmlkYmFsdGltb3JlLmNvbR8OBQ1UYXggU2FsZSBGQVFzHwQFDVRheCBTYWxlIEZBUXNkZAILDw8WCB8BBRFQYXJraW5nIEZpbmVzIEZBUR8NBUFodHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2Fuc3dlcnMvaW5kZXgucGhwP2FjdGlvbj1zaG93JmNhdD0xMB8OBRFQYXJraW5nIEZpbmVzIEZBUR8EBRFQYXJraW5nIEZpbmVzIEZBUWRkAgwPDxYIHwEFEVJlYWwgUHJvcGVydHkgRkFRHw0FQWh0dHA6Ly93d3cuYmFsdGltb3JlY2l0eS5nb3YvYW5zd2Vycy9pbmRleC5waHA/YWN0aW9uPXNob3cmY2F0PTEyHw4FEVJlYWwgUHJvcGVydHkgRkFRHwQFEVJlYWwgUHJvcGVydHkgRkFRZGQCDQ8PFggfAQUVUGFya2luZyBGaW5lcyBMaXN0aW5nHw0Fa2h0dHA6Ly93d3cuYmFsdGltb3JlY2l0eS5nb3YvZ292ZXJubWVudC90cmFuc3BvcnRhdGlvbi9kb3dubG9hZHMvMTIwNy8xMjE5MDcgUGFya2luZyBGaW5lcyBMaXN0aW5nIDIwMDcucGRmHw4FFVBhcmtpbmcgRmluZXMgTGlzdGluZx8EBRVQYXJraW5nIEZpbmVzIExpc3RpbmdkZAIODw8WCB8BBRhBdm9pZGluZyBQYXJraW5nIFRpY2tldHMfDQVoaHR0cDovL3d3dy5iYWx0aW1vcmVjaXR5Lmdvdi9nb3Zlcm5tZW50L3RyYW5zcG9ydGF0aW9uL2Rvd25sb2Fkcy8xMjA3LzEyMTkwNyBQYXJraW5nIFRpY2tldCBCcm9jaHVyZS5wZGYfDgUYQXZvaWRpbmcgUGFya2luZyBUaWNrZXRzHwQFGEF2b2lkaW5nIFBhcmtpbmcgVGlja2V0c2RkAg8PDxYIHwEFEVRyYW5zZmVyIFRheCBVbml0Hw0FQWh0dHA6Ly93d3cuYmFsdGltb3JlY2l0eS5nb3YvYW5zd2Vycy9pbmRleC5waHA/YWN0aW9uPXNob3cmY2F0PTExHw4FEVRyYW5zZmVyIFRheCBVbml0HwQFEVRyYW5zZmVyIFRheCBVbml0ZGQCEA8PFggfAQUKTGllbnMgVW5pdB8NBT1odHRwOi8vd3d3LmJhbHRpbW9yZWNpdHkuZ292L2dvdmVybm1lbnQvZmluYW5jZS9mYXF0bGllbnMucGhwHw4FCkxpZW5zIFVuaXQfBAUKTGllbnMgVW5pdGRkAhEPDxYIHwEFF0xpZW4gQ2VydGlmaWNhdGUgUG9saWN5Hw0FX2h0dHA6Ly93d3cuYmFsdGltb3JlY2l0eS5nb3YvZ292ZXJubWVudC9maW5hbmNlL2ltYWdlcy9MaWVuIENlcnRpZmljYXRlIHBvbGljeSBfMl8gT2N0IDIwMDgucGRmHw4FF0xpZW4gQ2VydGlmaWNhdGUgUG9saWN5HwQFF0xpZW4gQ2VydGlmaWNhdGUgUG9saWN5ZGQCEg8PFggfAQUIQ29udGFjdHMfDWUfDgUIQ29udGFjdHMfBGVkZAIFDxYCHwEFETxoMj5DT05UQUNUUzwvaDI+ZAIGDxYCHwEFqwM8ZGl2IHN0eWxlPSdwYWRkaW5nOjEwcHg7Jz48YSBocmVmPSdtYWlsdG86QmFsdGltb3JlQ2l0eUNvbGxlY3Rpb25zQGJhbHRpbW9yZWNpdHkuZ292Jz48c3Ryb25nPlJldmVudWUgQ29sbGVjdGlvbnM8L3N0cm9uZz48L2E+PGJyLz4yMDAgSG9sbGlkYXkgU3QuLCBSb29tIDc8YnIvPjxici8+PGEgaHJlZj0naHR0cDovL3d3dy5iYWx0aW1vcmVjaXR5Lmdvdi9nb3Zlcm5tZW50L2ZpbmFuY2UvcmV2ZW51ZS5waHAjY29udGFjdHMnPjxzdHJvbmc+QWxsICBDb250YWN0IE51bWJlcnM8L3N0cm9uZz48L2E+PGJyLz48YnIgLz4gPGJyLz48aDE+QWRtaW5pc3RyYXRpb248L2gxPiA8YnIvPjxzdHJvbmc+IEhlbnJ5IFJheW1vbmQgIDxiciAvPiA8L3N0cm9uZz48ZW0+Q2hpZWY8L2VtPjxiciAvPkJ1cmVhdSBvZiBSZXZlbnVlIENvbGxlY3Rpb25zPC9kaXY+ZAIJD2QWAgIBD2QWEAIBDxYCHgRocmVmBTZodHRwOi8vY2l0eXNlcnZpY2VzLmJhbHRpbW9yZWNpdHkuZ292L1NwZWNpYWxCZW5lZml0cy9kAgIPDxYIHwEFbVRoZSBFWEVDVVRFIHBlcm1pc3Npb24gd2FzIGRlbmllZCBvbiB0aGUgb2JqZWN0ICdBZGRMb2dFbnRyeScsIGRhdGFiYXNlICdGaW5hbmNlX1JlYWxQcm9wZXJ0eScsIHNjaGVtYSAnZGJvJy4fAmgeCUZvcmVDb2xvcgqNAR4EXyFTQgIEZGQCAw8PFgIfAmhkFghmDxBkDxYEZgIBAgICAxYEEAUJMjAxNS8yMDE2BQQyMDE2ZxAFCTIwMTQvMjAxNQUEMjAxNWcQBQkyMDEzLzIwMTQFBDIwMTRnEAUJMjAxMi8yMDEzBQQyMDEzZxYBZmQCAQ8PFgIfAQUEMDAyMWRkAgIPDxYCHwEFAzAyMmRkAgYPDxYCHwJoZGQCBA8PFgIfAmhkFgICAQ8WAh8BBYAJPG9sPjxsaT5UaGlzIHBhZ2UgaXMgZm9yIFJlYWwgUHJvcGVydHkgdGF4ZXMuICBVc2UgdGhpcyBsaW5rIGZvciA8YSBocmVmPScvU3BlY2lhbEJlbmVmaXRzLyc+U3BlY2lhbCBCZW5lZml0IERpc3RyaWN0IFN1cmNoYXJnZXM8L2E+LiANCjxsaT5JZiB5b3Uga25vdyB0aGUgQmxvY2sgJiBMb3QsIGVudGVyIG9ubHkgdGhlIGJsb2NrICYgbG90LiANCjxsaT5JZiB5b3UgYXJlIHNlYXJjaGluZyBieSBwcm9wZXJ0eSBhZGRyZXNzIG9yIG93bmVyIG5hbWUsIHlvdSBtYXkgZW50ZXIgYW55IHBvcnRpb24gb2YgZWl0aGVyIG9yIGJvdGggb2YgdGhvc2UgZmllbGRzLiAgV2hlbiB5b3UgZW50ZXIgZGF0YSBpbiBhIHNlYXJjaCBmaWVsZCwgdGhlIGRhdGEgeW91IGVudGVyZWQgaXMgbG9va2VkIGZvciBhbnl3aGVyZSB3aXRoaW4gdGhhdCBmaWVsZC4gRm9yIGV4YW1wbGUsIGlmIHlvdSBlbnRlciBCbHVlIGluIHRoZSBBZGRyZXNzIGZpZWxkLCB5b3Ugd2lsbCBnZXQgcmVzdWx0cyBpbmNsdWRpbmcgQmx1ZWJlcnJ5LCBCbHVlYm9ubmV0LCBUcnVlQmx1ZSwgZXRjLiANCjxsaT5EaXJlY3Rpb25zIHN1Y2ggYXMgTm9ydGgsIFNvdXRoLCBFYXN0LCBXZXN0IHNob3VsZCBiZSBlbnRlcmVkIGFzIE4sUyxFLFcgd2l0aCBubyBwZXJpb2QuIA0KPGxpPklmIHlvdXIgc2VhcmNoIGZhaWxzLCByZXRyeSB3aXRoIGxlc3MgaW5mb3JtYXRpb24gc3VjaCBhcywgRmlyc3QgU2VhcmNoOiBPd25lcj1Sb3NlbmJsYXR0LCByZXN1bHRzPTAgU2Vjb25kIFNlYXJjaDogT3duZXI9Um9zZW4gcmVzdWx0cz0xMjQgDQo8bGk+TGVhdmUgb2ZmIGFsbCBzdHJlZXQgc3VmZml4ZXMgc3VjaCBhcyBTdC4sV2F5LCBSb2FkIGV0Yy4gDQo8bGk+V2hlbiBzZWFyY2hpbmcgYnkgbmFtZSwgZW50ZXIgaW4gTGFzdE5hbWUsIEZpcnN0TmFtZSBmb3JtYXQuIA0KPGxpPklmIGFsbCB5b3VyIHNlYXJjaGVzIGFyZSB1bnN1Y2Nlc3NmdWwsIHBsZWFzZSBjb250YWN0IHRoZSBEZXB0LiBvZiBGaW5hbmNlIGF0IDQxMC0zOTYtMzk4Nw0KPGxpPjxzdHJvbmc+UmV0dXJuZWQgc2VhcmNoIHJlc3VsdHMgYXJlIGxpbWl0ZWQgdG8gNTAgcmVjb3Jkcy4gSWYgeW91IHJlYWNoIHRoaXMgbGltaXQsIHBsZWFzZSByZWZpbmUgeW91ciBzZWFyY2ggY3JpdGVyaWEuPHN0cm9uZz4NCjwvb2w+ZAIFDw8WAh8CZ2QWCgIBDw8WBB8CZx8BBTE8Yj5Dcml0ZXJpYSBVc2VkOjwvYj5ZZWFyPTIwMTYgQmxvY2s9MDAyMSBMb3Q9MDIyZGQCAw8PFggfEAqQAR8BBRY8Yj5SZWNvcmRzIGZvdW5kOjwvYj4xHwJnHxECBGRkAgUPDxYEHwJnHwEFGTxiPlNvcnRlZCBCeTo8L2I+QmxvY2tMb3RkZAIHDw8WAh8CZ2RkAgkPPCsACwEADxYMHgtfIUl0ZW1Db3VudAIBHghEYXRhS2V5cxYBBQkwMDIxIDAyMiAeDERhdGFLZXlGaWVsZAUIYmxvY2tsb3QeCVBhZ2VDb3VudAIBHhVfIURhdGFTb3VyY2VJdGVtQ291bnQCAR4QQ3VycmVudFBhZ2VJbmRleGZkFgJmD2QWAgIBD2QWDGYPDxYCHwEFBTAwMjEgZGQCAQ8PFgIfAQUEMDIyIGRkAgIPDxYCHwEFITE3MDUgQkFLRVIgU1QgICAgICAgICAgICAgICAgICAgIGRkAgMPZBYIAgEPDxYCHwEFIURFTUFSLCBEQVZJRCAgICAgICAgICAgICAgICAgICAgIGRkAgMPDxYCHwEFITE3MDUgQkFLRVIgU1QgICAgICAgICAgICAgICAgICAgIGRkAgUPDxYCHwEFIUJBTFRJTU9SRSBNRCAyMTIxNy0xNjAyICAgICAgICAgIGRkAgcPDxYCHwEFISAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRkAgQPZBYCAgEPZBYCAgEPFQEhMTcwNSBCQUtFUiBTVCAgICAgICAgICAgICAgICAgICAgZAIFD2QWAgIBD2QWAgIBDxUBITE3MDUgQkFLRVIgU1QgICAgICAgICAgICAgICAgICAgIGQCBg9kFgQCDQ9kFgJmD2QWAmYPZBYCZg8PFgIfAmhkZAIVD2QWAgIBDw8WAh8BBS9QYXkgT25saW5lIHdpdGggQ3JlZGl0IENhcmQgb3IgQ2hlY2tpbmcgQWNjb3VudGRkAgcPZBYCAgcPZBYCAgEPPCsACwBkAggPDxYCHwJoZGQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgIFHmN0bDAwJGN0bDAwJGltZ0J0bkdvb2dsZVNlYXJjaAUUY3RsMDAkY3RsMDAkUmFkTWVudTGVJdC2P+UeVXl5cdDYZzQRTCbPRg==',
    formdict['__VIEWSTATEGENERATOR'] = 'AE2FC7FE'
    formdict['__EVENTARGUMENT'] = ''
    formdict['ctl00_ctl00_RadMenu1_ClientState'] = ''
    formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnDdyear'] = '0'
    formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnSYear'] = '2016'
    formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnYear'] = '2016'
    formdict['ctl00$ctl00$txtGoogleCustomSearch'] = 'Search Unavailable'
    formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnOwner'] = ''
    formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnSOwner'] = ''
    
    
    # Site requires only specific fields are filled, e.g., if block and lot are available, clear the Address field
    if block != '' and lot != '':
        formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnBlock'] = block
        formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnLot'] = lot
        formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnAddress'] = ''
    elif homeAddress != '':
        formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnAddress'] = homeAddress
        formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnBlock'] = ''
        formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnLot'] = ''
    
    # hdn and hdnS terms should match
    formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnSBlock'] = formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnBlock']
    formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnSLot'] = formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnLot']
    formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnSAddress'] = formdict['ctl00$ctl00$rootMasterContent$LocalContentPlaceHolder$hdnAddress']
    
    
    session = Session()
    
    # HEAD requests ask for *just* the headers, which is all you need to grab the
    # session cookie
    session.head('http://cityservices.baltimorecity.gov/realproperty')
    
    response = session.post('http://cityservices.baltimorecity.gov/realproperty/default.aspx',
        data = formdict
    )
    
    # Output rendered HTML page to file in current directory
    #with open('propinfo.html','wb') as f:
    #	f.write(response.text.encode('utf-8'))	
            
    return response.text.encode('utf-8')
Пример #33
0
class SXCluster():
    """Makes requests to an SX cluster."""
    def __init__(self, host, port, is_secure, token, clustername):
        self.host = host
        self.port = port
        self.is_secure = is_secure
        self.session = Session()
        self.session.auth = SXAuth(token, clustername)

    def url(self, path, query):
        if self.is_secure:
            scheme = "https"
        else:
            scheme = "http"
        netloc = self.host
        if self.port is not None:
            netloc += ":%d" % self.port
        return urlunsplit((scheme, netloc, path, query, None))

    def check(self, r):
        if r.status_code == 502 or r.status_code == 504:
            raise SXException(r.text, 'Cannot connect to SX cluster')
        if not ('SX-Cluster' in r.headers):
            raise SXException(r.text, 'Not an SX cluster')
        if r.status_code != 200:
            try:
                info = r.json()
                if 'ErrorMessage' in info:
                    raise SXException(info['ErrorMessage'])
            except ValueError:
                pass
        return r

# TODO: SSL cert verif.
    def head(self, path, query=None):
        return self.session.head(self.url(path, query), verify=False)

    def get(self, path, query=None):
        r = self.session.get(self.url(path, query), verify=False)
        return self.check(r)

    def put(self, path, query=None, data=None,
            content_type='application/octet-stream'):
        r = self.session.put(self.url(path, query), data, verify=False,
                             headers={'Content-Type': content_type})
        return self.check(r)

    def delete(self, path, query=None):
        r = self.session.put(self.url(path, query), verify=False)
        return self.check(r)

    def job_put(self, path, payload, desc):
        r = self.put(path, data=json.dumps(payload),
                     content_type='application/json')
        return SXJob(self, r, desc)

    def job_delete(self, path, payload, desc):
        r = self.delete(path)
        return SXJob(self, r, desc)

    def close(self):
        self.session.close()
        self.session = None
Пример #34
0
from requests import Session

session = Session()

# HEAD requests ask for *just* the headers, which is all you need to grab the
# session cookie
session.head('https://catalog.paytm.com')

response = session.post(
    url='https://catalog.paytm.com/v1/g/recharge-plans/mobile/2g-data?operator=BSNL&circle=Karnataka&type=mobile&description=1&page_count=1&items_per_page=30&sort_price=0&callback=angular.callbacks._9',
    data={
        'N': '4294966750',
        'form-trigger': 'moreId',
        'moreId': '156#327',
        'pageType': 'EventClass'
    },
    headers={
        'Referer': 'http://sportsbeta.ladbrokes.com/football'
    }
)

print response.text

GET  HTTP/1.1
Host: catalog.paytm.com
Connection: keep-alive
Accept: */*
User-Agent: Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36
Referer: https://paytm.com/
Accept-Encoding: gzip, deflate, sdch
Accept-Language: en-US,en;q=0.8
Пример #35
0
from requests import Session

session = Session()

# HEAD requests ask for *just* the headers, which is all you need to grab the
# session cookie
session.head('http://www.gatesfoundation.org/How-We-Work/Quick-Links/Grants-Database')

response = session.post(
    url='http://www.gatesfoundation.org/services/gfo/search.ashx',
    data={
        'N': '4294966750',
        'form-trigger': 'moreId',
        'moreId': '156#327',
        'pageType': 'EventClass'
    },
    headers={
        'Referer': 'http://www.gatesfoundation.org/How-We-Work/Quick-Links/Grants-Database'
    }
)

print response.text
Пример #36
0
class Rest(Scraper):
    def __init__(self, url, method='GET', id=None, headers=None):
        Scraper.__init__(self, url, id=id)
        self.setMethod(method)
        self.setParams({})
        self.session = Session()

        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.3;, Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
        }

        if headers is not None:
            self.updateHeaders(headers)

    def setMethod(self, method):
        if type(method) is not str and (method != 'GET' or method != 'POST'
                                        or method != 'GET_JSON'
                                        or method != 'POST_JSON'):
            raise Exception(
                "Method must be a string GET/POST/GET_JSON/POST_JSON")

        self.method = method

    def updateHeaders(self, headers):
        if type(headers) is not dict:
            raise Exception("Headers must be a dict.")

        self.headers.update(headers)

    def setParams(self, params):
        if type(params) is not dict:
            raise Exception("Params must be a dict.")

        self.params = params

    def updateParams(self, params):
        if type(params) is not dict:
            raise Exception("Params must be a dict.")

        self.params.update(params)

    def getParams(self):
        return self.params

    def getHeaders(self):
        return self.headers

    def getMethod(self):
        return self.method

    def execute(self):
        kwargs = {}
        args = []

        if self.getMethod() == "GET" or self.getMethod() == "POST":
            args.append(self.getMethod())
            kwargs['data'] = self.getParams()
        elif self.getMethod() == "GET_JSON":
            args.append("GET")
            kwargs['json'] = self.getParams()
        elif self.getMethod() == "POST_JSON":
            args.append("POST")
            kwargs['json'] = self.getParams()

        args.append(self.url)
        kwargs['headers'] = self.headers

        req = Request(*args, **kwargs)

        response = self.session.head(self.url)
        content_type = response.headers['content-type']

        prepared = self.session.prepare_request(req)
        print('Request url -> {}'.format(req.url))
        print('Request headers : {}'.format(req.headers))
        print('Request body : {}'.format(prepared.body))
        res = self.session.send(prepared)

        return (content_type, res)
Пример #37
0
class Server(object):
    def __init__(self, host="http://localhost:5984", auth=None,
                 trust_env=False):
        self.host = host
        self.session = Session()
        # trust env make use of get_netrc that is soooo slow
        self.session.trust_env = trust_env
        self.session.auth = auth
        self.session.headers = {
                "Content-Type": "application/json",
                }

    def __getitem__(self, name):
        return Database(name, server=self, create=False)

    def __len__(self):
        return len(self.get_databases())

    def __nonzero__(self):
        """
        Returns if server is available
        """

        try:
            self.session.head(self.host)
            return True
        except:
            return False

    def __delitem__(self, name):
        self.delete_db(name)

    def __contains__(self, db_or_name):
        """
        Tests if the database exists
        """

        name = db_or_name
        if isinstance(db_or_name, Database):
            name = db_or_name.name

        request = self.session.head(self.host + "/" + name)
        if request.status_code == 404:
            return False
        return True

    def __iter__(self):
        """
        Iterates over all the databases and returns Database instances
        """

        return (Database(name, server=self) for name in self.get_databases())

    def uuids(self, count=1):
        """
        Returns a a lists of "count" uuids generated in the server
        """

        request = self.session.get(self.host + "/_uuids",
                params={"count": count})
        return request.json()["uuids"]

    def get_databases(self):
        request = self.session.get(self.host + "/_all_dbs")
        return request.json()

    def version(self):
        request = self.session.get(self.host)
        return request.json()["version"]

    def create_db(self, name):
        """
        Try to create a new database or raise error

        Posible Errors: DBExists, AuthFail
        """

        return Database(name, server=self, create=True)

    def delete_db(self, db_or_name):
        """
        Try to delete database or raise error

        Posible Errors: DBNotExists, AuthFail
        """

        name = db_or_name
        if isinstance(db_or_name, Database):
            name = db_or_name.name

        request = self.session.delete(self.host + "/" + name)
        if not request.ok:
            if request.status_code == 401:
                raise excepts.AuthFail
            elif request.status_code == 404:
                raise excepts.DBNotExists
            raise Exception(request.status_code)
Пример #38
0
class DefaultClient(BaseClient):
    """Session based HTTP (default) client for ArangoDB."""

    def __init__(self, init_data):
        """Initialize the session with the credentials.

        :param init_data: data for client initialization
        :type init_data: dict
        """
        self.session = Session()
        self.session.auth = init_data["auth"]

    def head(self, url, params=None, headers=None, auth=None):
        """HTTP HEAD method.

        :param url: request URL
        :type url: str
        :param params: request parameters
        :type params: dict or None
        :param headers: request headers
        :type headers: dict or None
        :param auth: username and password tuple
        :type auth: tuple or None
        :returns: ArangoDB http response object
        :rtype: arango.response.Response
        """
        res = self.session.head(
            url=url,
            params=params,
            headers=headers,
        )
        return Response(
            method="head",
            url=url,
            headers=res.headers,
            status_code=res.status_code,
            content=res.text,
            status_text=res.reason
        )

    def get(self, url, params=None, headers=None, auth=None):
        """HTTP GET method.

        :param url: request URL
        :type url: str
        :param params: request parameters
        :type params: dict or None
        :param headers: request headers
        :type headers: dict or None
        :param auth: username and password tuple
        :type auth: tuple or None
        :returns: ArangoDB http response object
        :rtype: arango.response.Response
        """
        res = self.session.get(
            url=url,
            params=params,
            headers=headers,
        )
        return Response(
            method="get",
            url=url,
            headers=res.headers,
            status_code=res.status_code,
            content=res.text,
            status_text=res.reason
        )

    def put(self, url, data=None, params=None, headers=None, auth=None):
        """HTTP PUT method.

        :param url: request URL
        :type url: str
        :param data: request payload
        :type data: str or dict or None
        :param params: request parameters
        :type params: dict or None
        :param headers: request headers
        :type headers: dict or None
        :param auth: username and password tuple
        :type auth: tuple or None
        :returns: ArangoDB http response object
        :rtype: arango.response.Response
        """
        res = self.session.put(
            url=url,
            data=data,
            params=params,
            headers=headers,
        )
        return Response(
            method="put",
            url=url,
            headers=res.headers,
            status_code=res.status_code,
            content=res.text,
            status_text=res.reason
        )

    def post(self, url, data=None, params=None, headers=None, auth=None):
        """HTTP POST method.

        :param url: request URL
        :type url: str
        :param data: request payload
        :type data: str or dict or None
        :param params: request parameters
        :type params: dict or None
        :param headers: request headers
        :type headers: dict or None
        :param auth: username and password tuple
        :type auth: tuple or None
        :returns: ArangoDB http response object
        :rtype: arango.response.Response
        """
        res = self.session.post(
            url=url,
            data="" if data is None else data,
            params={} if params is None else params,
            headers={} if headers is None else headers,
        )
        return Response(
            method="post",
            url=url,
            headers=res.headers,
            status_code=res.status_code,
            content=res.text,
            status_text=res.reason
        )

    def patch(self, url, data=None, params=None, headers=None, auth=None):
        """HTTP PATCH method.

        :param url: request URL
        :type url: str
        :param data: request payload
        :type data: str or dict or None
        :param params: request parameters
        :type params: dict or None
        :param headers: request headers
        :type headers: dict or None
        :param auth: username and password tuple
        :type auth: tuple or None
        :returns: ArangoDB http response object
        :rtype: arango.response.Response
        """
        res = self.session.patch(
            url=url,
            data=data,
            params=params,
            headers=headers,
        )
        return Response(
            method="patch",
            url=url,
            headers=res.headers,
            status_code=res.status_code,
            content=res.text,
            status_text=res.reason
        )

    def delete(self, url, params=None, headers=None, auth=None):
        """HTTP DELETE method.

        :param url: request URL
        :type url: str
        :param params: request parameters
        :type params: dict or None
        :param headers: request headers
        :type headers: dict or None
        :param auth: username and password tuple
        :type auth: tuple or None
        :returns: ArangoDB http response object
        :rtype: arango.response.Response
        """
        res = self.session.delete(
            url=url,
            params=params,
            headers=headers,
            auth=auth,
        )
        return Response(
            method="delete",
            url=url,
            headers=res.headers,
            status_code=res.status_code,
            content=res.text,
            status_text=res.reason
        )

    def options(self, url, data=None, params=None, headers=None, auth=None):
        """HTTP OPTIONS method.

        :param url: request URL
        :type url: str
        :param data: request payload
        :type data: str or dict or None
        :param params: request parameters
        :type params: dict or None
        :param headers: request headers
        :type headers: dict or None
        :param auth: username and password tuple
        :type auth: tuple or None
        :returns: ArangoDB http response object
        :rtype: arango.response.Response
        """
        res = self.session.options(
            url=url,
            data="" if data is None else data,
            params={} if params is None else params,
            headers={} if headers is None else headers,
        )
        return Response(
            method="options",
            url=url,
            headers=res.headers,
            status_code=res.status_code,
            content=res.text,
            status_text=res.reason
        )

    def close(self):
        """Close the HTTP session."""
        self.session.close()