def test_put(self): g = Grab() g.setup(post='abc', url=SERVER.BASE_URL, method='put', debug=True) SERVER.REQUEST['debug'] = True g.request() self.assertEqual(SERVER.REQUEST['method'], 'PUT') self.assertEqual(SERVER.REQUEST['headers']['content-length'], '3')
def genKey(self): # try: self.login = self.lineEdit_login.text() self.login = str(self.login) self.login = self.login.strip() self.login = QtCore.QString(self.login) self.hash_key = hashlib.md5(self.login).hexdigest() datetime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) grab = Grab(timeout=50, connect_timeout=25, url='http://sysadmin.1cps.ru/alisa/activate.php') grab.setup( post={ 'user': self.login, 'key': self.hash_key, 'datetime': datetime, 'KeyGen': True }) grab.request() self.lineEdit_key.setText(self.hash_key) # Вставить в буфер обмена cb = QtGui.QApplication.clipboard() cb.clear(mode=cb.Clipboard) cb.setText(self.hash_key, mode=cb.Clipboard)
def request(self, url, post_data): g = Grab(connect_timeout=10) g.setup(url=url) if post_data: g.setup(post=post_data) g.request() return {'code': g.doc.code, 'body': g.doc.body, 'url': url}
def test_put(self): g = Grab() g.setup(post="abc", url=SERVER.BASE_URL, method="put", debug=True) SERVER.REQUEST["debug"] = True g.request() self.assertEqual(SERVER.REQUEST["method"], "PUT") self.assertEqual(SERVER.REQUEST["headers"]["content-length"], "3")
def get_avito_phone_image(url,ref_url): g = Grab() g.setup(headers={'X-Requested-With': 'XMLHttpRequest', 'Referer': ref_url}, url=url) g.request() m = strip_image.search(str(g.doc.body)) image = Image.open(BytesIO(base64.b64decode(m.group(1)))) return image
class RSS: def __init__(self, site, url): self.site = site self.grab = Grab() self.grab.setup( headers={'Accept-Charset': 'utf-8'}, url=url ) def project_generator(self): self.grab.request() tree = fromstring(self.grab.response.body) items = tree.xpath('/rss/channel/item/*') items = map(lambda e: e.text.encode('utf-8'), items) items = zip(*[items[i::6] for i in range(6)]) for item in items: project = { 'title': item[0], 'link': item[1], 'description': item[2], 'category': item[4], 'date': item[5], } yield project def update(self): for item in self.project_generator(): if model.Project.query.filter_by(url=item['link']).first(): continue category = self.get_category(item['category']) model.Project(name=item['title'], url=item['link'], description=item['description'], category=category, date=datetime.datetime.strptime( item['date'], "%a, %d %b %Y %H:%M:%S %Z" ), site=self.site ) session.commit() def get_category(self, path): categories = path.split(' / ') categories = map(lambda s: s.strip(), categories) categories.reverse() category = None while len(categories): category = model.Category.query.filter_by( name=categories.pop(), parent=category, site=self.site ) category = category.first() return category
def get_avito_phone_image(url, ref_url): g = Grab() g.setup(headers={ 'X-Requested-With': 'XMLHttpRequest', 'Referer': ref_url }, url=url) g.request() m = strip_image.search(str(g.doc.body)) image = Image.open(BytesIO(base64.b64decode(m.group(1)))) return image
class RSS: def __init__(self, site, url): self.site = site self.grab = Grab() self.grab.setup(headers={'Accept-Charset': 'utf-8'}, url=url) def project_generator(self): self.grab.request() tree = fromstring(self.grab.response.body) items = tree.xpath('/rss/channel/item/*') items = map(lambda e: e.text.encode('utf-8'), items) items = zip(*[items[i::6] for i in range(6)]) for item in items: project = { 'title': item[0], 'link': item[1], 'description': item[2], 'category': item[4], 'date': item[5], } yield project def update(self): for item in self.project_generator(): if model.Project.query.filter_by(url=item['link']).first(): continue category = self.get_category(item['category']) model.Project(name=item['title'], url=item['link'], description=item['description'], category=category, date=datetime.datetime.strptime( item['date'], "%a, %d %b %Y %H:%M:%S %Z"), site=self.site) session.commit() def get_category(self, path): categories = path.split(' / ') categories = map(lambda s: s.strip(), categories) categories.reverse() category = None while len(categories): category = model.Category.query.filter_by(name=categories.pop(), parent=category, site=self.site) category = category.first() return category
def test_multipart_post(self): g = Grab(url=SERVER.BASE_URL, debug_post=True, transport=GRAB_TRANSPORT) # Dict g.setup(multipart_post={"foo": "bar"}) g.request() self.assertTrue(b'name="foo"' in SERVER.REQUEST["post"]) # Few values with non-ascii data # TODO: understand and fix # AssertionError: 'foo=bar&gaz=%D0%94%D0%B5%D0%BB%D1%8C%D1%84%D0%B8%D0%BD&abc=' != 'foo=bar&gaz=\xd0\x94\xd0\xb5\xd0\xbb\xd1\x8c\xd1\x84\xd0\xb8\xd0\xbd&abc=' # g.setup(post=({'foo': 'bar', 'gaz': u'Дельфин', 'abc': None})) # g.request() # self.assertEqual(SERVER.REQUEST['post'], 'foo=bar&gaz=Дельфин&abc=') # Multipart data could not be string g.setup(multipart_post="asdf") self.assertRaises(GrabMisuseError, lambda: g.request()) # tuple with one pair g.setup(multipart_post=(("foo", "bar"),)) g.request() self.assertTrue(b'name="foo"' in SERVER.REQUEST["post"]) # tuple with two pairs g.setup(multipart_post=(("foo", "bar"), ("foo", "baz"))) g.request() self.assertTrue(b'name="foo"' in SERVER.REQUEST["post"])
def test_multipart_post(self): g = Grab(url=SERVER.BASE_URL, debug_post=True, transport=GRAB_TRANSPORT) # Dict g.setup(multipart_post={'foo': 'bar'}) g.request() self.assertTrue(b'name="foo"' in SERVER.REQUEST['post']) # Few values with non-ascii data # TODO: understand and fix # AssertionError: 'foo=bar&gaz=%D0%94%D0%B5%D0%BB%D1%8C%D1%84%D0%B8%D0%BD&abc=' != 'foo=bar&gaz=\xd0\x94\xd0\xb5\xd0\xbb\xd1\x8c\xd1\x84\xd0\xb8\xd0\xbd&abc=' #g.setup(post=({'foo': 'bar', 'gaz': u'Дельфин', 'abc': None})) #g.request() #self.assertEqual(SERVER.REQUEST['post'], 'foo=bar&gaz=Дельфин&abc=') # Multipart data could not be string g.setup(multipart_post='asdf') self.assertRaises(GrabMisuseError, lambda: g.request()) # tuple with one pair g.setup(multipart_post=(('foo', 'bar'), )) g.request() self.assertTrue(b'name="foo"' in SERVER.REQUEST['post']) # tuple with two pairs g.setup(multipart_post=(('foo', 'bar'), ('foo', 'baz'))) g.request() self.assertTrue(b'name="foo"' in SERVER.REQUEST['post'])
def get_categories(): grab = Grab() grab.setup(url='http://www.free-lance.ru/freelancers/') print u'Запрос страницы' grab.request() print u'Извлечение категорий' categories = grab.xpath_list('//ul[@id="accordion"]/li[not(@class)]') for category in categories: subcategories = category.xpath('./ul[@class="element"]/li/span/a') subcategories = map(lambda a: a.text_content().encode('utf-8'), subcategories) yield (category.xpath('./a')[0].text_content().encode('utf-8'), subcategories) print u'Завершено'
def get_actual_price(self, item_class, output_queue): try: url_price = "http://steamcommunity.com/market/itemordershistogram?country=RU&language=russian¤cy=5&" \ "item_nameid=%s&two_factor=0" % item_class.get_item_nameid() g = Grab(log_file='log.html') g.setup(url=url_price) time.sleep(random.uniform(0, 2)) g.request() responce = json.loads(g.response.body) lowest_sell_price = float(responce['lowest_sell_order'])/100 highest_buy_order = float(responce['highest_buy_order'])/100 item_class.set_lowest_sell_price_in_market(lowest_sell_price) item_class.set_highest_buy_order_in_market(highest_buy_order) except Exception as inst: # print type(inst) # print inst return inst if item_class.get_profit() > 0: output_queue.put(item_class)
class WebClient: def __init__(self): self.__client = Grab() self.__target = None self.__client_header = {} def set_target(self, host, port=80, timeout=DEFAULT_TIMEOUT): self.__target = urllib.parse.urlparse(host) if self.__target.netloc == '' or self.__target.scheme == '': self.__target = urllib.parse.urlparse("http://" + host) self.__client.setup(url=urllib.parse.urlunparse(self.__target)) def set_proxy(self, host, port=80): self.__client.setup(proxy=host + ":" + port) def __put_def_header(self): pass # self.__client.putheader("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:38.0)") # self.__client.putheader('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8') # self.__client.putheader('Accept-Language', 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3') # self.__client.putheader('Accept-Encoding', 'text, deflate') # self.__client.putheader('Connection', 'keep-alive') # self.__client.putheader('Referer', 'http://{0}/'.format(self.__target.netloc)) # self.__client.putheader('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') # self.__client.putheader('DNT', '1') def __put_client_header(self): self.__client.setup(headers=self.__client_header) def putheader(self, header, value): assert isinstance(header, str) assert isinstance(value, str) self.__client_header[header] = value def start_request(self, method="GET",data=None): """ :param method: :param data: data to send to server. :return: str """ raw_data = None try: self.__client.setup(post=data) answer = self.__client.request() if answer.code != 200: logger.warning( "failed to get response from server! error: {0}\n".format(http.client.responses[answer.code])) raw_data = answer.unicode_body() except Exception as err: logger.log(logger.DETAIL, str(err) + " when request to {0}\n".format( self.__target.netloc + self.__target.path)) # when error happened, it often # means there is still a connection # thus just ignore it. return raw_data
def get_username(url=None): G = Grab() u = G.request(url=url) name = u.select('//td/a[@rel="nofollow"]/@href').node().strip('/') text = u.select('//td/a[@rel="nofollow"]/text()').node().lower() if name == text: return name elif name and not text: return name elif text and not name: return text return None
def test_unicode_post(self): # By default, unicode post should be converted into utf-8 g = Grab() data = u"фыва" g.setup(post=data, url=SERVER.BASE_URL) g.request() self.assertEqual(SERVER.REQUEST["post"], data.encode("utf-8")) # Now try cp1251 with charset option SERVER.REQUEST["charset"] = "cp1251" g = Grab() data = u"фыва" g.setup(post=data, url=SERVER.BASE_URL, charset="cp1251", debug=True) g.request() self.assertEqual(SERVER.REQUEST["post"], data.encode("cp1251")) # Now try dict with unicode value & charset option SERVER.REQUEST["charset"] = "cp1251" g = Grab() data = u"фыва" g.setup(post={"foo": data}, url=SERVER.BASE_URL, charset="cp1251", debug=True) g.request() test = "foo=%s" % quote(data.encode("cp1251")) test = test.encode("utf-8") # py3 hack self.assertEqual(SERVER.REQUEST["post"], test)
def test_unicode_post(self): # By default, unicode post should be converted into utf-8 g = Grab() data = u'фыва' g.setup(post=data, url=SERVER.BASE_URL) g.request() self.assertEqual(SERVER.REQUEST['post'], data.encode('utf-8')) # Now try cp1251 with charset option SERVER.REQUEST['charset'] = 'cp1251' g = Grab() data = u'фыва' g.setup(post=data, url=SERVER.BASE_URL, charset='cp1251', debug=True) g.request() self.assertEqual(SERVER.REQUEST['post'], data.encode('cp1251')) # Now try dict with unicode value & charset option SERVER.REQUEST['charset'] = 'cp1251' g = Grab() data = u'фыва' g.setup(post={'foo': data}, url=SERVER.BASE_URL, charset='cp1251', debug=True) g.request() test = 'foo=%s' % quote(data.encode('cp1251')) test = test.encode('utf-8') # py3 hack self.assertEqual(SERVER.REQUEST['post'], test)
def checkAuthorization(login, password): '''Проверка авторизации''' print('Проверка авторизации') key = '4109294306' # Для начала нужно создать обьект граб grab = Grab( timeout=50, connect_timeout=25, url='https://www.bridge-of-love.com/login.html', debug_post='True', log_dir='log' ) # Затемзалогинится на сайте # debug 120916 #------------------------------------------------ # grab.setup(post={'user_name': login.toUtf8(), 'password': password.toUtf8(), 'remember': 'on', 'ret_url': ''}) grab.setup(post={'key': key, 'user_name': login, 'password': password, 'remember': 'on', 'ret_url': ''}) #------------------------------------------------ # Отправка формы grab.request() print(dir(grab.doc)) # Проверка авторизации if not grab.doc.text_search(unicode(login)): print('Ошибка авторизации!') msg = u'Неправильный логин или пароль. Проверьте все еще раз и повторите ввод!' # debug 120916 #------------------------------------------------ print(grab.doc.select('//title').text()) #------------------------------------------------ else: print('Успешно') print(grab.doc.select('//title').text())
def get_categories(): grab = Grab() grab.setup(url='http://www.free-lance.ru/freelancers/') print u'Запрос страницы' grab.request() print u'Извлечение категорий' categories = grab.xpath_list('//ul[@id="accordion"]/li[not(@class)]') for category in categories: subcategories = category.xpath('./ul[@class="element"]/li/span/a') subcategories = map( lambda a: a.text_content().encode('utf-8'), subcategories ) yield ( category.xpath('./a')[0].text_content().encode('utf-8'), subcategories ) print u'Завершено'
def get_username(self): G = Grab() u = G.request(url=self.url) name = u.select('//td/a[@rel="nofollow"]/@href').node().strip('/') text = u.select('//td/a[@rel="nofollow"]/text()').node().lower() if name == text: self.uploader = name return self.uploader elif name and not text: self.uploader = name return self.uploader elif text and not name: self.uploader = text return self.uploader return None
def get_html(URL): #получили страницу g = Grab( url=URL, user_agent= "Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11", timeout=8) try: response = g.request() time.sleep(2) # response = urllib.request.urlopen(URL, timeout=8) logging.info('получили какой то response') # return response.read() return response.unicode_body() except: logging.info( 'Сервер не ответил за 8 секунд, попробуем еще раз!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' ) return get_html(URL)
def get_html(URL): # получили страницу g = Grab( url=URL, user_agent= "Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/2.0.0.11", timeout=8) try: response = g.request() # response = urllib.request.urlopen(URL, timeout=8) logging.info( 'запросили страницу со списком объявлений и сделали суп') # return response.read() time.sleep(2) return response.unicode_body() except: logging.warning( 'сервер не ответил вовремя по запросу списка объявлений, пробуем еще раз!!!!!!!!!!!!!!!!!' ) return get_html(URL)
def get_info(self): from grab import Grab G = Grab() u = G.request(url=self.url) re_likes = u.select('//span[@class="percent-likes"]').text('0%') re_comments = u.select('//a[@class="comments-btn"]').text( 'comments (0)') try: likes = re.search('(\d\d\d|\d\d|\d)%', re_likes).group(1) except AttributeError: likes = '' try: comments = re.search('\s?comments\s?\((\d\d\d\d|\d\d\d|\d\d|\d)\)', re_comments).group(1) except AttributeError: comments = '' tags = '/'.join( u.select('//td[preceding-sibling::td="TAGS"]/a').text_list( [])).lower() categories = '/'.join( u.select('//td[preceding-sibling::td="FROM"][3]/*').text_list( [])).lower() stars = '/'.join( u.select('//td[preceding-sibling::td="PORNSTARS"]').text_list( [])).lower() username = u.select('//td/a[@rel="nofollow"]/@href').node().strip( '/') or u.select( '//td/a[@rel="nofollow"]/text()').node().lower() or '' self.info = dict(likes=likes, tags=tags, categories=categories, comments=comments, uploader=username, stars=stars) if username: self.uploader = username for k, v in self.info.items(): self.__setattr__(k, v)
def test_unicode_post(self): # By default, unicode post should be converted into utf-8 g = Grab() data = u'фыва' g.setup(post=data, url=SERVER.BASE_URL) g.request() self.assertEqual(SERVER.REQUEST['post'], data.encode('utf-8')) # Now try cp1251 with charset option SERVER.REQUEST['charset'] = 'cp1251' g = Grab() data = u'фыва' g.setup(post=data, url=SERVER.BASE_URL, charset='cp1251', debug=True) g.request() self.assertEqual(SERVER.REQUEST['post'], data.encode('cp1251')) # Now try dict with unicode value & charset option SERVER.REQUEST['charset'] = 'cp1251' g = Grab() data = u'фыва' g.setup(post={'foo': data}, url=SERVER.BASE_URL, charset='cp1251', debug=True) g.request() self.assertEqual(SERVER.REQUEST['post'], 'foo=%s' % urllib.quote(data.encode('cp1251')))
#!/usr/bin/python # -*- coding: utf-8 -*- from grab import Grab import string g = Grab(log_file='out.html') g.go('http://www.investing.com/commodities/gold-technical') resp = g.response.body #print resp print len(resp) g.request(log_file='out.htm2') print len(g.response.body) search_area = g.xpath_text('//div[@class="techStudiesTabInnerWrap"]') t = search_area.split(); result = (t[0].split(':'))[1] i = 1 while (t[i] != 'Moving'): result += ' ' + t[i] i += 1; print result print t #print string.find(search_area, 'STRONG SELL')
import setup_script from grab import Grab g = Grab() g.setup(url='http://ya.ru/') g.setup(interface='tun0') g.request() print g.doc.select('//title').text() g.setup(interface='foo') try: g.request() except Exception, ex: print ex else: print g.doc.select('//title').text() g.setup(interface='192.168.170.18') g.request() print g.doc.select('//title').text()
def test_patch(self): g = Grab() g.setup(post='abc', url=SERVER.BASE_URL, method='patch') g.request() self.assertEqual(SERVER.REQUEST['method'], 'PATCH') self.assertEqual(SERVER.REQUEST['headers']['content-length'], '3')
from grab import Grab, UploadFile import logging logging.basicConfig(level=logging.DEBUG) g = Grab() g.setup(log_dir='log/grab') g.go('https://afisha.tut.by/film/', log_file='out.html') g.setup(post={'hi': u'Превед, яндекс!'}) g.request()
def button_clicked(): word.destroy() word = Tk() word.title = '' word.state = 'iconic' #word.iconify() #word.withdraw() mword = word.selection_get() g = Grab() g.request( log_file="/tmp/pygdic.log", url="http://translate.google.com/translate_t?hl=ru&langpair=auto|ru&text=" + mword) autp = g.css_text('span#result_box') trancecript = g.css_text('div#src-translit') outplen = len(autp) / dlstr for i in xrange(outplen + 1): ostr += autp[dlstr * i:dlstr * (i + 1)] + "\n" label = Label(word, width=dlstr + 2, text=ostr + "\n" + trancecript, font="Arial 14", bg="#ffffaa", fg="blue") label.pack(expand=True)
def test_post(self): g = Grab(url=SERVER.BASE_URL, debug_post=True, transport=GRAB_TRANSPORT) # Provide POST data in dict g.setup(post={'foo': 'bar'}) g.request() self.assertEqual(SERVER.REQUEST['post'], b'foo=bar') # Provide POST data in tuple g.setup(post=(('foo', 'TUPLE'), )) g.request() self.assertEqual(SERVER.REQUEST['post'], b'foo=TUPLE') # Provide POST data in list g.setup(post=[('foo', 'LIST')]) g.request() self.assertEqual(SERVER.REQUEST['post'], b'foo=LIST') # Order of elements should not be changed (1) g.setup(post=[('foo', 'LIST'), ('bar', 'BAR')]) g.request() self.assertEqual(SERVER.REQUEST['post'], b'foo=LIST&bar=BAR') # Order of elements should not be changed (2) g.setup(post=[('bar', 'BAR'), ('foo', 'LIST')]) g.request() self.assertEqual(SERVER.REQUEST['post'], b'bar=BAR&foo=LIST') # Provide POST data in byte-string g.setup(post='Hello world!') g.request() self.assertEqual(SERVER.REQUEST['post'], b'Hello world!') # Provide POST data in unicode-string g.setup(post=u'Hello world!') g.request() self.assertEqual(SERVER.REQUEST['post'], b'Hello world!') # Provide POST data in non-ascii unicode-string g.setup(post=u'Привет, мир!') g.request() self.assertEqual(SERVER.REQUEST['post'], u'Привет, мир!'.encode('utf-8')) # Two values with one key g.setup(post=(('foo', 'bar'), ('foo', 'baz'))) g.request() self.assertEqual(SERVER.REQUEST['post'], b'foo=bar&foo=baz')
class FanduelApiProvider: def __init__(self): default_logging() logDir = '/tmp/fanduel' if not os.path.exists(logDir): os.makedirs(logDir) self.grab = Grab(log_dir=logDir, debug_post=True) def auth(self, email): cookiefile = "%s.cookie" % (re.sub('[!@#$.]', '', email)) if not os.path.isfile(cookiefile): open(cookiefile, 'a+').close() self.grab.setup(cookiefile=cookiefile) print "real authorization" self.grab.go(LOGIN_PAGE) self.grab.set_input('email', email) self.grab.set_input('password', getPassword()) self.grab.submit() else: print "use coockie" self.grab.setup(cookiefile=cookiefile) self.grab.go(MAIN_PAGE) return self.grab.response.code def getContests(self): response = self.grab.response.body; rawJsonDataBegin = response.find('LobbyConnection.initialData') + len('LobbyConnection.initialData = ') rawJsonDataEnd = response.rfind(';', rawJsonDataBegin, response.find('LobbyConnection.lastUpdate')) rawJsonInitData = response[rawJsonDataBegin:rawJsonDataEnd] jsonInitData = json.loads(rawJsonInitData) return jsonInitData['additions'] def openContest(self, contest): self.grab.go(contest.url()) def getPlayerData(listOfPlayers): jsonPlayers = [] for p in listOfPlayers: jsonPlayer = [p.position(), p.id(), p.fixture(), p.teamId(), 'false'] jsonPlayers.append(json.dumps(jsonPlayer)) playerData = json.dumps(jsonPlayers) return playerData def getAttr(self, path, attr, selector = None): if not selector: selector = self.grab.doc return selector.select(path).attr(attr) def joinContest(self, contest, playerData): self.openContest(contest) getValue = lambda name: self.grab.doc.select("//form/input[@name='%s']"%(name)).attr('value') postReq = {} postReq['cc_session_id'] = getValue('cc_session_id') postReq['cc_action'] = 'cca_jointable' postReq['cc_failure_url'] = getValue('cc_failure_url') postReq['game_id'] = getValue('game_id') postReq['playerData'] = playerData postReq['table_id'] = str(contest.uniqueId()) postReq['tablespec_id'] = '' postReq['is_public'] = getValue('is_public') postReq['currencytype'] = getValue('currencytype') print json.dumps(postReq, indent=2) self.grab.setup(multipart_post=postReq) # { # 'cc_session_id':getValue('cc_session_id'), # 'cc_action':'cca_jointable', # 'cc_failure_url' : getValue('cc_failure_url'), # 'game_id' : getValue('game_id'), 'playerData' : playerData, # 'table_id' : str(contest.uniqueId()), 'tablespec_id' : str(contest.tableSpecId()), # 'is_public' : '1', 'currencytype' : '1' # }) self.grab.request() print self.grab.doc.select('//head/title').text() return self.grab.response.code def getPlayers(self, contest): self.openContest(contest) response = self.grab.response.body; rawJsonDataBegin = response.find('FD.playerpicker.allPlayersFullData') + len('FD.playerpicker.allPlayersFullData = ') rawJsonDataEnd = response.rfind(';', rawJsonDataBegin, response.find('FD.playerpicker.teamIdToFixtureCompactString')) rawJsonData = response[rawJsonDataBegin:rawJsonDataEnd] tmp = json.loads(rawJsonData) return tmp
class FanduelApiProvider: def __init__(self): default_logging() logDir = '/tmp/fanduel' if not os.path.exists(logDir): os.makedirs(logDir) self.grab = Grab(log_dir=logDir, debug_post=True) def auth(self, email): cookiefile = "%s.cookie" % (re.sub('[!@#$.]', '', email)) if not os.path.isfile(cookiefile): open(cookiefile, 'a+').close() self.grab.setup(cookiefile=cookiefile) print "real authorization" self.grab.go(LOGIN_PAGE) self.grab.set_input('email', email) self.grab.set_input('password', getPassword()) self.grab.submit() else: print "use coockie" self.grab.setup(cookiefile=cookiefile) self.grab.go(MAIN_PAGE) return self.grab.response.code def getContests(self): response = self.grab.response.body rawJsonDataBegin = response.find('LobbyConnection.initialData') + len( 'LobbyConnection.initialData = ') rawJsonDataEnd = response.rfind( ';', rawJsonDataBegin, response.find('LobbyConnection.lastUpdate')) rawJsonInitData = response[rawJsonDataBegin:rawJsonDataEnd] jsonInitData = json.loads(rawJsonInitData) return jsonInitData['additions'] def openContest(self, contest): self.grab.go(contest.url()) def getPlayerData(listOfPlayers): jsonPlayers = [] for p in listOfPlayers: jsonPlayer = [ p.position(), p.id(), p.fixture(), p.teamId(), 'false' ] jsonPlayers.append(json.dumps(jsonPlayer)) playerData = json.dumps(jsonPlayers) return playerData def getAttr(self, path, attr, selector=None): if not selector: selector = self.grab.doc return selector.select(path).attr(attr) def joinContest(self, contest, playerData): self.openContest(contest) getValue = lambda name: self.grab.doc.select("//form/input[@name='%s']" % (name)).attr('value') postReq = {} postReq['cc_session_id'] = getValue('cc_session_id') postReq['cc_action'] = 'cca_jointable' postReq['cc_failure_url'] = getValue('cc_failure_url') postReq['game_id'] = getValue('game_id') postReq['playerData'] = playerData postReq['table_id'] = str(contest.uniqueId()) postReq['tablespec_id'] = '' postReq['is_public'] = getValue('is_public') postReq['currencytype'] = getValue('currencytype') print json.dumps(postReq, indent=2) self.grab.setup(multipart_post=postReq) # { # 'cc_session_id':getValue('cc_session_id'), # 'cc_action':'cca_jointable', # 'cc_failure_url' : getValue('cc_failure_url'), # 'game_id' : getValue('game_id'), 'playerData' : playerData, # 'table_id' : str(contest.uniqueId()), 'tablespec_id' : str(contest.tableSpecId()), # 'is_public' : '1', 'currencytype' : '1' # }) self.grab.request() print self.grab.doc.select('//head/title').text() return self.grab.response.code def getPlayers(self, contest): self.openContest(contest) response = self.grab.response.body rawJsonDataBegin = response.find( 'FD.playerpicker.allPlayersFullData') + len( 'FD.playerpicker.allPlayersFullData = ') rawJsonDataEnd = response.rfind( ';', rawJsonDataBegin, response.find('FD.playerpicker.teamIdToFixtureCompactString')) rawJsonData = response[rawJsonDataBegin:rawJsonDataEnd] tmp = json.loads(rawJsonData) return tmp
class ParserChsu(): def __init__(self, group, semestr): self.week = 0 self.urlStart = 'http://rasp.chsu.ru/' self.group = group self.semestr = semestr self.grab = Grab() def getWeek(self): """ Получаем неделю с сайта """ logging.debug("Получаю неделю") grab = Grab() grab.go(self.urlStart) self.week = grab.doc.select('//div[@id="Layer2"]//font[@size="5"]').text() logging.info("Полученна неделя: %s" % self.week) def setGroupAndSemestr(self): """ Отправляем post запрос для установки группы и семестра """ self.grab.go(self.urlStart) self.grab.setup(post={'gr': self.group, 'ss': self.semestr, }) self.grab.request() def _parsingPeriod(self, period): """ Разбираем строку с неделями и возвращаем два значения """ periodTo = period[2:4] periodDo = period[7:] return periodTo, periodDo def getRasp(self): """ Получаем расписание все расписание """ self.ListRasp = [] for elem in self.grab.doc.select('//div[@align="center"]//table/tr'): day = elem.select('./td')[0].text() LessonsTime = elem.select('.//td')[2].text() LessonsName = elem.select('.//td')[4].text() Period = elem.select('.//td')[6].text() Chet = elem.select('.//td')[8].text() Prepod = elem.select('.//td')[10].text() Locate = elem.select('.//td')[12].text() #Преобразуем значение периода periodTo, periodDo = self._parsingPeriod(Period) if ((day != "") and (day != u"День")): tmp = day, LessonsTime, LessonsName, periodTo, periodDo, Chet, Prepod, Locate self.ListRasp.append(tmp) else: pass logging.info("Найдено предметов %i" % len(self.ListRasp)) def getChet(self, week): if (0 == week % 2): return u"чет" else: return u"нечет" def RaspWeek(self): self.getWeek() self.setGroupAndSemestr() self.getRasp() week = int(self.week) ChetWeek = self.getChet(week) ListRaspWeek = [] for elem in self.ListRasp: if ((week >= int(elem[3])) and (week <= int(elem[4]))): #Проверяем вхождение недели if (ChetWeek == elem[5]) or (elem[5] == u"ежен"): ListRaspWeek.append(elem) else: pass logging.info("На этой неделе: %i" % len(ListRaspWeek)) ListRaspWeek = sorted(ListRaspWeek, key=sortByDay) self.ListRaspWeek = ListRaspWeek self.PrintWeek() return ListRaspWeek def PrintWeek(self): """ Печатаем расписание на экран """ for elem in self.ListRaspWeek: print elem[0], elem[1], elem[2], elem[6], elem[7]
def test_post(self): g = Grab(url=SERVER.BASE_URL, debug_post=True, transport=GRAB_TRANSPORT) # Provide POST data in dict g.setup(post={"foo": "bar"}) g.request() self.assertEqual(SERVER.REQUEST["post"], b"foo=bar") # Provide POST data in tuple g.setup(post=(("foo", "TUPLE"),)) g.request() self.assertEqual(SERVER.REQUEST["post"], b"foo=TUPLE") # Provide POST data in list g.setup(post=[("foo", "LIST")]) g.request() self.assertEqual(SERVER.REQUEST["post"], b"foo=LIST") # Order of elements should not be changed (1) g.setup(post=[("foo", "LIST"), ("bar", "BAR")]) g.request() self.assertEqual(SERVER.REQUEST["post"], b"foo=LIST&bar=BAR") # Order of elements should not be changed (2) g.setup(post=[("bar", "BAR"), ("foo", "LIST")]) g.request() self.assertEqual(SERVER.REQUEST["post"], b"bar=BAR&foo=LIST") # Provide POST data in byte-string g.setup(post="Hello world!") g.request() self.assertEqual(SERVER.REQUEST["post"], b"Hello world!") # Provide POST data in unicode-string g.setup(post=u"Hello world!") g.request() self.assertEqual(SERVER.REQUEST["post"], b"Hello world!") # Provide POST data in non-ascii unicode-string g.setup(post=u"Привет, мир!") g.request() self.assertEqual(SERVER.REQUEST["post"], u"Привет, мир!".encode("utf-8")) # Two values with one key g.setup(post=(("foo", "bar"), ("foo", "baz"))) g.request() self.assertEqual(SERVER.REQUEST["post"], b"foo=bar&foo=baz")
def test_post(self): g = Grab(url=SERVER.BASE_URL, debug_post=True, transport=GRAB_TRANSPORT) # Provide POST data in dict g.setup(post={'foo': 'bar'}) g.request() self.assertEqual(SERVER.REQUEST['post'], 'foo=bar') # Provide POST data in tuple g.setup(post=(('foo', 'TUPLE'),)) g.request() self.assertEqual(SERVER.REQUEST['post'], 'foo=TUPLE') # Provide POST data in list g.setup(post=[('foo', 'LIST')]) g.request() self.assertEqual(SERVER.REQUEST['post'], 'foo=LIST') # Order of elements should not be changed (1) g.setup(post=[('foo', 'LIST'), ('bar', 'BAR')]) g.request() self.assertEqual(SERVER.REQUEST['post'], 'foo=LIST&bar=BAR') # Order of elements should not be changed (2) g.setup(post=[('bar', 'BAR'), ('foo', 'LIST')]) g.request() self.assertEqual(SERVER.REQUEST['post'], 'bar=BAR&foo=LIST') # Provide POST data in byte-string g.setup(post='Hello world!') g.request() self.assertEqual(SERVER.REQUEST['post'], 'Hello world!') # Provide POST data in unicode-string g.setup(post=u'Hello world!') g.request() self.assertEqual(SERVER.REQUEST['post'], 'Hello world!') # Provide POST data in non-ascii unicode-string g.setup(post=u'Привет, мир!') g.request() self.assertEqual(SERVER.REQUEST['post'], 'Привет, мир!') # Two values with one key g.setup(post=(('foo', 'bar'), ('foo', 'baz'))) g.request() self.assertEqual(SERVER.REQUEST['post'], 'foo=bar&foo=baz')
def test_patch(self): g = Grab() g.setup(post="abc", url=SERVER.BASE_URL, method="patch") g.request() self.assertEqual(SERVER.REQUEST["method"], "PATCH") self.assertEqual(SERVER.REQUEST["headers"]["content-length"], "3")
RegionHref = region.select('.//a').attr('href') for shop in g.doc.select('//div[contains(@class, "sale-group")]')[ 0].select('.//ul[@class="aside-links-menu"]')[iter].select('.//a'): shopStr = shopName + shop.text() print(shopStr) #Получим координаты params = urllib.parse.urlencode({ 'geocode': shopStr, 'key': 'ADBtnFgBAAAA1eTFSgIAyDrzmyVO9aGQNIKI-FDOBJ_SgDwAAAAAAAAAAAAaxBIO6vnCckEA_mCVDUalQYyhEA==' }) url = GEOCODE_URL + params gNew = Grab() gNew.setup(url=url, log_file='out.xml') gNew.request() from xml.dom import minidom xmldoc = minidom.parse('out.xml') itemlist = xmldoc.getElementsByTagName('pos') print(len(itemlist)) for item in itemlist: print(shopStr) print(item.firstChild.nodeValue) iter += 1
word.destroy() word=Tk() word.title='' word.state='iconic' #word.iconify() #word.withdraw() mword = word.selection_get() g=Grab() g.request(log_file="/tmp/pygdic.log", url="http://translate.google.com/translate_t?hl=ru&langpair=auto|ru&text="+mword) autp=g.css_text('span#result_box') trancecript=g.css_text('div#src-translit') outplen=len(autp)/dlstr for i in xrange(outplen+1): ostr+=autp[dlstr*i:dlstr*(i+1)]+"\n" label= Label(word,width=dlstr+2, text = ostr+"\n"+trancecript , font="Arial 14", bg="#ffffaa",fg="blue") label.pack(expand=True) word.mainloop() print autp,trancecript exit()