def test_3(self): result = self.g.doc.structure( '//issue', home_url='./home-url/text()', articles=x( './articles/article', id='./@id', title='./title/text()', url='./url/text()', hotkeys=x( './hotkeys', hotkey='./hotkey/text()' ) ) ) self.assertEqual( result, loads(''' [ { "articles": [ { "url": "/article1", "id": "3", "hotkeys": [ { "hotkey": "language" } ], "title": "Issue overview" }, { "url": "/article2", "id": null, "hotkeys": [ { "hotkey": null } ], "title": "Latest reviews" }, { "url": null, "id": "4", "hotkeys": [ { "hotkey": null } ], "title": null } ], "home_url": "www.j.ru/issues/" } ] ''') )
def test_3(self): result = self.g.doc.structure('//issue', home_url='./home-url/text()', articles=x( './articles/article', id='./@id', title='./title/text()', url='./url/text()', hotkeys=x('./hotkeys', hotkey='./hotkey/text()')))
def test_3(self): result = self.g.doc.structure('//issue', home_url='./home-url/text()', articles=x( './articles/article', id='./@id', title='./title/text()', url='./url/text()', hotkeys=x('./hotkeys', hotkey='./hotkey/text()'))) self.assertEqual( result, loads(''' [ { "articles": [ { "url": "/article1", "id": "3", "hotkeys": [ { "hotkey": "language" } ], "title": "Issue overview" }, { "url": "/article2", "id": null, "hotkeys": [ { "hotkey": null } ], "title": "Latest reviews" }, { "url": null, "id": "4", "hotkeys": [ { "hotkey": null } ], "title": null } ], "home_url": "www.j.ru/issues/" } ] '''))
def test_3(self): result = self.g.doc.structure( '//issue', home_url='./home-url/text()', articles=x( './articles/article', id='./@id', title='./title/text()', url='./url/text()', hotkeys=x( './hotkeys', hotkey='./hotkey/text()' ) ) )
def task_page(self, grab, task): if grab.response.code != 200: with database.transaction(): Removed.create(id=task.pk) return data = grab.doc.structure( '//*[@id="detailsframe"]', x( './*[@id="details"]', category='.//dt[.="Type:"]/following-sibling::dd/a/text()', size=('.//dt[.="Size:"]/following-sibling::dd/text()', converters.extract_integer), created=('.//dt[.="Uploaded:"]/following-sibling::dd/text()', converters.extract_datetime), hash=('.//dt[.="Info Hash:"]/following-sibling::dd', converters.extract_tail), ), nfo=('//*[@class="nfo"]/pre', converters.extract_inner_html), magnet='//*[@class="download"]/a/@href', title='./*[@id="title"]/text()', )[0] data.update({ 'id': task.pk, }) with database.transaction(): category = Category.get_or_create(name=data.pop('category', '')) Torrent.create(category=category, **data)
def test_2(self): result = self.g.doc.structure( '//issue', x('./detail', description=('./description/text()', lambda item: ' '.join(item.split())), detail_number=('./number/text()', int)), title='./title/text()', date='./date/text()', )
def test_2(self): result = self.g.doc.structure( '//issue', x( './detail', description=('./description/text()', lambda item: ' '.join(item.split())), detail_number=('./number/text()', int) ), title='./title/text()', date='./date/text()', )
def test_2(self): result = self.g.doc.structure( '//issue', x('./detail', description=('./description/text()', lambda item: ' '.join(item.split())), detail_number=('./number/text()', int)), title='./title/text()', date='./date/text()', ) self.assertEqual( result, loads(''' [ { "detail_number": 445, "date": "12.09.98", "description": "issue 2 detail description", "title": "XML today" } ] '''))
def test_2(self): result = self.g.doc.structure( '//issue', x( './detail', description=('./description/text()', lambda item: ' '.join(item.split())), detail_number=('./number/text()', int) ), title='./title/text()', date='./date/text()', ) self.assertEqual( result, loads(''' [ { "detail_number": 445, "date": "12.09.98", "description": "issue 2 detail description", "title": "XML today" } ] ''') )
def task_seller_list(self, grab, task): u"""Страница пагинации продавца""" # print grab.response.body apps = grab.doc.structure( '//*[@class="apps-thumb-list-p"]', x( './/*[@class="apps-con"]', cost='./em/text()', rating='.//*[starts-with(@class, "star")]/text()' ), url='.//a/@href', title='.//*[@class="apps-title"]/strong/text()', image='.//*[@class="apps-img-size03"]/img/@src', ) apps = map(self.app_data_prepare, apps) self.store_apps_data(apps) for app in apps: self.make_app_task(app['url']) if len(apps) == self.PER_PAGE: yield self.make_apps_task(task.page + 1)
def task_detail(self, grab, task): remove_space = lambda text: text.replace(u'\xa0', '') get_text = lambda node: node.text_content().strip() def get_number(text): try: return int(remove_space(text)) except ValueError: pass return text # get_number = lambda text: int(remove_space(text)) # def remove_space(text): # try: # return text.replace(u'\xa0', '') # except: # pass # return text # def split_range(text): # text = remove_space(text) # for splitter in [u'–', u'-']: # if splitter in text: # parts = text.split(u'–') # break # try: # parts = map(int, parts) # except ValueError: # pass # return parts data = grab.doc.structure( '//body', #info= x( './/*[@class="details-info"]', image='.//*[@class="cover-image"]/@src', name=('.//*[@class="document-title"]', get_text), ), rating=x( './/*[@class="rating-box"]', score='.//*[@class="score"]/text()', reviews_num=('.//*[@class="reviews-num"]/text()', get_number), grades=x( './/*[@class="rating-histogram"]//*[starts-with(@class, "rating-bar-container ")]', grade=('.//*[@class="bar-label"]', get_text), count=('.//*[@class="bar-number"]/text()', get_number) ) ), detail=x( './/*[@class="details-section-contents"]//*[@class="content"]', type='./@itemprop', value='./text()', ) ) # data = data[0] # data['info'] = data['info'][0] data['rating'] = data['rating'][0] # преобразование детальной информации в словарь и разбиение диапазона загрузок detail = data['detail'] detail = { item['type']: item['value'] for item in detail } # detail['numDownloads'] = split_range(detail['numDownloads']) # разделители и форматирование чисел # разное в разных странах data['detail'] = detail # self.apps.append(data)